import os
import pandas as pd
import csv
import numpy as np
import math
from matplotlib.lines import Line2D
import matplotlib as mpl
from matplotlib.patches import Patch
import matplotlib.pyplot as plt
import random
from scipy.spatial import distance
from scipy import stats
from sklearn import manifold
from sklearn.decomposition import PCA
import statsmodels.stats.multitest as sm
This file includes the results of a microbiome analysis performed on samples taken from four individuals that were originally used to determine the “Impact of DNA source on genetic variant detection from human whole-genome sequencing data”.
This included blood, saliva and buccal samples taken from four individuals (blood samples were taken at a different time than saliva and buccal samples). Additionally, a methylation-based enrichment for eukaryotic DNA was performed on the saliva and buccal samples.
Fastq.gz files were downloaded from the ENA database, project accession number PRJNA523344
Kneaddata was used for quality control and removal of human sequences. This included:
- Trimmomatic 0.39: “SLIDINGWINDOW:4:20 MINLEN:50”
- Bowtie2 with the GRCh38_PhiX database (to remove human and PhiX reads): “–fast –dovetail”
parallel -j 2 --link 'kneaddata -i {1} -i {2} -o kneaddata_out/ \
-db /home/shared/bowtiedb/GRCh38_PhiX --trimmomatic /home/robyn/Trimmomatic-0.39/ \
-t 40 --trimmomatic-options "SLIDINGWINDOW:4:20 MINLEN:50" \
--bowtie2-options "--fast --dovetail" --remove-intermediate-output' \
::: raw_data/*_1.fastq.gz ::: raw_data/*_2.fastq.gz
mkdir kneaddata_out/contam_seq
mv kneaddata_out/*_contam*.fastq kneaddata_out/contam_seq
kneaddata_read_count_table --input kneaddata_out --output kneaddata_read_counts.txt
concat_paired_end.pl -p 4 --no_R_match -o cat_reads kneaddata_out/*_paired_*.fastq
#3
#set up colors function (to get up to 120 colors, but with up to 40 unique colors)
def get_cols(num):
colormap_20, colormap_40b, colormap_40c = mpl.cm.get_cmap('tab20', 256), mpl.cm.get_cmap('tab20b', 256), mpl.cm.get_cmap('tab20c', 256)
norm, norm2 = mpl.colors.Normalize(vmin=0, vmax=19), mpl.colors.Normalize(vmin=20, vmax=39)
m1, m2, m3 = mpl.cm.ScalarMappable(norm=norm, cmap=colormap_20), mpl.cm.ScalarMappable(norm=norm, cmap=colormap_40b), mpl.cm.ScalarMappable(norm=norm2, cmap=colormap_40c)
colors_20 = [m1.to_rgba(a) for a in range(20)]
colors_40 = [m2.to_rgba(a) for a in range(20)]+[m3.to_rgba(a) for a in range(20,40)]
if num < 21: return colors_20
elif num < 41: return colors_40
else: return colors_40+colors_40+colors_40
#and colors and shapes for different participants and body sites
colors_dict, shapes_dict = {'Blood':'#900C3F', 'Saliva':'#016F85', 'Buccal':'#ff8300', 'Saliva_euk':'#02aed1', 'Buccal_euk':'#FFC300'}, {'Huref':'o', 'PGPC-0002':'^', 'PGPC-0005':'*', 'PGPC-0006':'s', 'PGPC-0050':'p'}
#4
#get numbers of reads for different steps
reads = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/read_counts.txt', sep='\t', index_col=3, header=0)
participant_dict, site_dict, full_name_dict = {}, {}, {}
samples = list(reads.index.values)
for s in samples:
participant_dict[s] = reads.loc[s, 'Participant']
site_dict[s] = reads.loc[s, 'Body site']
full_name_dict[s] = reads.loc[s, 'Participant']+' '+reads.loc[s, 'Body site']
total_reads = pd.DataFrame(reads.loc[:, 'cat_reads'])
sample_names = [participant_dict[name]+' '+site_dict[name] for name in samples]
colors = [colors_dict[s] for s in list(reads.loc[:, 'Body site'].values)]
shapes = [shapes_dict[s] for s in list(reads.loc[:, 'Participant'].values)]
#5
plt.figure(figsize=(10, 5))
ax1, ax2 = plt.subplot(121), plt.subplot(122)
plt.sca(ax1)
plt.bar(list(reads.index.values), reads.loc[:, 'Percentage'].values, color=colors, edgecolor='k')
plt.xticks(list(reads.index.values), sample_names, rotation=90)
plt.ylabel('Reads kept (%)')
plt.xlim([-0.5,20.5])
plt.sca(ax2)
plt.bar(list(reads.index.values), reads.loc[:, 'Percentage'].values, color=colors, edgecolor='k')
plt.semilogy()
plt.xticks(list(reads.index.values), sample_names, rotation=90)
plt.ylabel('Log reads kept (%)')
handles = [Patch(facecolor=colors_dict[color], edgecolor='k', label=color) for color in colors_dict]
ax2.legend(handles=handles, bbox_to_anchor=(1.4,1.05))
plt.xlim([-0.5,20.5])
plt.tight_layout()
plt.show()
#6
plt.figure(figsize=(10, 5))
ax1, ax2 = plt.subplot(121), plt.subplot(122)
plt.sca(ax1)
plt.bar(list(reads.index.values), reads.loc[:, 'cat_reads'].values, color=colors, edgecolor='k')
plt.xticks(list(reads.index.values), sample_names, rotation=90)
plt.xlim([-0.5,20.5])
plt.ylabel('Reads remaining')
plt.sca(ax2)
plt.bar(list(reads.index.values), reads.loc[:, 'cat_reads'].values, color=colors, edgecolor='k')
plt.semilogy()
plt.xticks(list(reads.index.values), sample_names, rotation=90)
plt.ylabel('Log reads remaining')
handles = [Patch(facecolor=colors_dict[color], edgecolor='k', label=color) for color in colors_dict]
ax2.legend(handles=handles, bbox_to_anchor=(1.4,1.05))
plt.xlim([-0.5,20.5])
plt.tight_layout()
plt.show()
#7
reads_remain = reads.loc[:, ['Percentage', 'cat_reads']].rename(index=full_name_dict, columns = {'cat_reads':'Number'})
#8
py$reads_remain %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "350px", height = "400px")
| Percentage | Number | |
|---|---|---|
| Huref Blood | 0.2319067 | 1087311 |
| PGPC-0002 Blood | 0.5748318 | 3321664 |
| PGPC-0005 Blood | 0.5149286 | 1903482 |
| PGPC-0006 Blood | 0.7146067 | 3593441 |
| PGPC-0050 Blood | 0.6870164 | 2940390 |
| PGPC-0002 Saliva | 3.9289510 | 16561343 |
| PGPC-0005 Saliva | 55.6458826 | 244058333 |
| PGPC-0006 Saliva | 13.1608121 | 60497393 |
| PGPC-0050 Saliva | 56.3700720 | 257049004 |
| PGPC-0002 Buccal | 2.8112226 | 11462781 |
| PGPC-0005 Buccal | 2.8202504 | 12751614 |
| PGPC-0006 Buccal | 5.1521560 | 22434503 |
| PGPC-0050 Buccal | 2.4297107 | 10667011 |
| PGPC-0002 Saliva_euk | 1.2333408 | 5345888 |
| PGPC-0005 Saliva_euk | 8.3104464 | 38138382 |
| PGPC-0006 Saliva_euk | 1.9067564 | 8786635 |
| PGPC-0050 Saliva_euk | 5.6187272 | 25037552 |
| PGPC-0002 Buccal_euk | 0.9385856 | 4380036 |
| PGPC-0005 Buccal_euk | 1.2119820 | 5584415 |
| PGPC-0006 Buccal_euk | 1.5567291 | 7232280 |
| PGPC-0050 Buccal_euk | 1.3836141 | 6382288 |
The taxonomy has been profiled using:
1. HUMAnN
- HUMAnN2 and MetaPhlAn2
- HUMAnN3 and MetaPhlAn3
2. Kraken2 with Bracken
- GTDB (no confidence parameter set) - using the database constructed using Struo, release 89
- GTDB (confidence = 0.1)
- Minikraken v1 (no human genome, no confidence parameter set)
- Minikraken v1 (no human genome, confidence = 0.1)
- Minikraken v2 (with human genome, no confidence parameter set)
- Minikraken v2 (with human genome, confidence = 0.1)
- RefSeq Complete v93 (no confidence parameter set)
- RefSeq Complete v93 (confidence = 0.1)
Commands run:
humann2_databases --download chocophlan full humann_database/
humann2_databases --download uniref uniref90_diamond humann_database/
# Do this for each of uniref_90 and uniref_50
parallel -j 4 'humann2 --threads 10 --input {} --output humann2_out_90/{/.} --protein-database humann_database/uniref_90/' ::: cat_reads/*fastq
mkdir humann2_log_90
python
import os
samples = os.listdir('humann2_out_90')
for sample in samples:
os.system('cp humann2_out_90/'+sample+'/'+sample+'_humann2_temp/'+sample+'.log humann2_log_90/')
quit()
mkdir humann2_final_out_90
humann2_join_tables -s --input humann2_out_90/ --file_name pathabundance --output humann2_final_out_90/humann2_pathabundance.tsv
humann2_join_tables -s --input humann2_out_90/ --file_name pathcoverage --output humann2_final_out_90/humann2_pathcoverage.tsv
humann2_join_tables -s --input humann2_out_90/ --file_name genefamilies --output humann2_final_out_90/humann2_genefamilies.tsv
mkdir humann2_final_out_90/bugs_lists/
python
import os
samples = os.listdir('humann2_out_90')
for sample in samples:
os.system('cp humann2_out_90/'+sample+'/'+sample+'_humann2_temp/'+sample+'_metaphlan_bugs_list.tsv humann2_final_out_90/bugs_lists/')
quit()
merge_metaphlan_tables.py humann2_final_out_90/bugs_lists/*tsv > humann2_final_out_90/metaphlan_merged.tsv
rm -r humann2_final_out_90/bugs_lists
cd humann2_final_out_90/
humann2_renorm_table --input humann2_pathabundance.tsv --units relab --output humann2_pathabundance_relab.tsv
humann2_split_stratified_table --input humann2_pathabundance_relab.tsv --output ./
humann2_renorm_table --input humann2_genefamilies.tsv --units relab --output humann2_genefamilies_relab.tsv
humann2_split_stratified_table --input humann2_genefamilies_relab.tsv --output ./
pip install humann
pip install metaphlan
humann_databases --download chocophlan full humann_databases/humann3_databases/ --update-config yes
humann_databases --download uniref uniref90_diamond humann_databases/humann3_databases/ --update-config yes
humann_databases --download uniref uniref50_diamond humann_databases/humann3_databases/ --update-config yes
humann_databases --download utility_mapping full humann_databases/humann3_databases/ --update-config yes
wget https://github.com/bbuchfink/diamond/releases/download/v0.9.24/diamond-linux64.tar.gz
tar xvf diamond-linux64.tar.gz
parallel -j 1 'humann --input {} --output human_metagenome/humann3_out/ --threads 12' ::: human_metagenome/cat_reads/*.fastq
merge_metaphlan_tables.py humann_final_out/bugs_lists/*tsv > humann3_final_out/metaphlan_merged.tsv
rm -r humann3_final_out/bugs_lists
humann_join_tables -s --input humann3_out/ --file_name pathabundance --output humann3_final_out/humann3_pathabundance.tsv
humann_join_tables -s --input humann3_out/ --file_name pathcoverage --output humann3_final_out/humann3_pathcoverage.tsv
humann_join_tables -s --input humann3_out/ --file_name genefamilies --output humann3_final_out/humann3_genefamilies.tsv
humann_renorm_table --input humann3_pathabundance.tsv --units relab --output humann3_pathabundance_relab.tsv
humann_split_stratified_table --input humann3_pathabundance_relab.tsv --output ./
humann_renorm_table --input humann3_genefamilies.tsv --units relab --output humann3_genefamilies_relab.tsv
humann_split_stratified_table --input humann3_genefamilies_relab.tsv --output ./
find . -name \*aligned.sam -type f -delete
find . -name \*unaligned.fa -type f -delete
find . -name \*aligned.tsv -type f -delete
humann_renorm_table --input humann3_genefamilies.tsv --output humann3_genefamilies_cpm.tsv --units cpm --update-snames
humann_regroup_table --input humann3_genefamilies_cpm.tsv --output humann3_genefamilies_cpm_ko_50.tsv --groups uniref50_ko
humann_regroup_table --input humann3_genefamilies_cpm.tsv --output humann3_genefamilies_cpm_ko_90.tsv --groups uniref90_ko
#GTDB
wget http://ftp.tue.mpg.de/ebio/projects/struo/GTDB_release89/kraken2/
#RefSeq complete
sudo mount -t ramfs none /scratch/ramdisk/
sudo cp -a $DBNAME /ramdisk
sudo cp -a /home/shared/Kraken2.0.8_Bracken150mer_RefSeqCompleteV93/ /scratch/ramdisk/
#For each database (with and without adding --confidence 0.1:
parallel -j 1 'kraken2 --use-names --threads 10 --db gtdb/kraken2_struo/ --fastq-input {} --report kraken2_out/{/.}.report > kraken2_out/{/.}.kraken' ::: human_metagenome/cat_reads/*.fastq
parallel -j 1 'bracken -d gtdb/kraken2_struo/ -i {} -l S -o {.}.bracken' ::: kraken2_report/*.kreport
#9
#get the taxonomy file and sort it to strain and genus level
taxa = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann/processing/humann2_final_out_90/metaphlan_merged.tsv', sep='\t', header=0, index_col=0)
tax_names = list(taxa.index.values)
keeping = []
for a in range(len(tax_names)):
if 't__' in tax_names[a]:
keeping.append(True)
elif 'unclassified' in tax_names[a]:
keeping.append(True)
else:
keeping.append(False)
strain = taxa.loc[keeping, :]
strain_names = list(strain.index.values)
strain_dict = {}
for i in range(len(strain_names)):
strain_dict[strain_names[i]] = strain_names[i].split('|s__')[0].split('|g__')[1]
genus = strain.rename(index=strain_dict)
genus = genus.groupby(by=genus.index, axis=0).sum()
#10
#define the function that calculates the nmds plots
def transform_for_NMDS(df, dist_met='braycurtis'):
X = df.iloc[0:].values
y = df.iloc[:,0].values
seed = np.random.RandomState(seed=3)
X_true = X
similarities = distance.cdist(X_true, X_true, dist_met)
mds = manifold.MDS(n_components=2, max_iter=3000, eps=1e-9, random_state=seed,
dissimilarity="precomputed", n_jobs=1)
#print(similarities)
pos = mds.fit(similarities).embedding_
nmds = manifold.MDS(n_components=2, metric=False, max_iter=3000, eps=1e-12,
dissimilarity="precomputed", random_state=seed, n_jobs=1,
n_init=1)
npos = nmds.fit_transform(similarities, init=pos)
# Rescale the data
pos *= np.sqrt((X_true ** 2).sum()) / np.sqrt((pos ** 2).sum())
npos *= np.sqrt((X_true ** 2).sum()) / np.sqrt((npos ** 2).sum())
# Rotate the data
clf = PCA()
X_true = clf.fit_transform(X_true)
pos = clf.fit_transform(pos)
npos = clf.fit_transform(npos)
return pos, npos, nmds.stress_
strain_t = strain.transpose()
genus_t = genus.transpose()
handles1 = [Patch(facecolor=colors_dict[color], edgecolor='k', label=color) for color in colors_dict]
handles2 = [Line2D([0], [0], marker=shapes_dict[shape], color='w', label=shape, markerfacecolor='k', markersize=15) for shape in shapes_dict]
#11
strain_pos, strain_npos, strain_stress = transform_for_NMDS(strain_t, 'braycurtis')
genus_pos, genus_npos, genus_stress = transform_for_NMDS(genus_t, 'braycurtis')
plt.figure(figsize=(10,5))
ax1, ax2 = plt.subplot(121), plt.subplot(122)
for a in range(len(strain_npos)):
ax1.scatter(strain_npos[a,0], strain_npos[a,1], marker=shapes[a], color=colors[a], s=100)
ax2.scatter(genus_npos[a,0], genus_npos[a,1], marker=shapes[a], color=colors[a], s=100)
ax1.set_xlabel('nMDS1')
ax2.set_xlabel('nMDS1')
ax1.set_ylabel('nMDS2')
ax1.set_title('Strain')
ax2.set_title('Genus')
ax2.legend(handles=handles1+handles2, bbox_to_anchor=(1,1))
plt.tight_layout()
plt.show()
#12
strain_pos, strain_npos, strain_stress = transform_for_NMDS(strain_t, 'euclidean')
genus_pos, genus_npos, genus_stress = transform_for_NMDS(genus_t, 'euclidean')
plt.figure(figsize=(10,5))
ax1, ax2 = plt.subplot(121), plt.subplot(122)
for a in range(len(strain_npos)):
ax1.scatter(strain_npos[a,0], strain_npos[a,1], marker=shapes[a], color=colors[a], s=100)
ax2.scatter(genus_npos[a,0], genus_npos[a,1], marker=shapes[a], color=colors[a], s=100)
ax1.set_xlabel('nMDS1')
ax2.set_xlabel('nMDS1')
ax1.set_ylabel('nMDS2')
ax1.set_title('Strain')
ax2.set_title('Genus')
ax2.legend(handles=handles1+handles2, bbox_to_anchor=(1.4,1))
plt.tight_layout()
plt.show()
#13
strain_pos, strain_npos, strain_stress = transform_for_NMDS(strain_t, 'jaccard')
genus_pos, genus_npos, genus_stress = transform_for_NMDS(genus_t, 'jaccard')
plt.figure(figsize=(10,5))
ax1, ax2 = plt.subplot(121), plt.subplot(122)
for a in range(len(strain_npos)):
ax1.scatter(strain_npos[a,0], strain_npos[a,1], marker=shapes[a], color=colors[a], s=100)
ax2.scatter(genus_npos[a,0], genus_npos[a,1], marker=shapes[a], color=colors[a], s=100)
ax1.set_xlabel('nMDS1')
ax2.set_xlabel('nMDS1')
ax1.set_ylabel('nMDS2')
ax1.set_title('Strain')
ax2.set_title('Genus')
ax2.legend(handles=handles1+handles2, bbox_to_anchor=(1.4,1))
plt.tight_layout()
plt.show()
Here the relative abundance of taxa calulated by MetaPhlAn2 are plotted at the Kingdom level for each sample.
#14
plt.figure(figsize=(7,5))
ax1 = plt.subplot(111)
plt.bar(list(taxa.columns.values), taxa.loc['k__Viruses', :].values, color='#C70039', edgecolor='k')
plt.bar(list(taxa.columns.values), taxa.loc['k__Bacteria', :].values, bottom=taxa.loc['k__Viruses', :].values, color='#026B81', edgecolor='k')
#plt.xticks(list(taxa.columns.values), sample_names, rotation=90)
empty = []
for x in range(0,21):
empty.append('')
ax1.text(x, -2, sample_names[x], color=colors[x], rotation=90, va='top', ha='center')
plt.xticks(range(0, 21), empty)
plt.xlim([-0.5,20.5])
handles = [Patch(facecolor='#C70039', edgecolor='k', label='Viruses'), Patch(facecolor='#026B81', edgecolor='k', label='Bacteria')]
plt.legend(handles=handles, bbox_to_anchor=(1,1.05))
plt.tight_layout()
plt.show()
Here the relative abundance of taxa calulated by MetaPhlAn2 are plotted at the Genus level for each sample. Genera with below 1% maximum relative abundance have been removed.
#15
genus = genus[genus.max(axis=1) > 1]
genera = list(genus.index.values)
plt.figure(figsize=(10,5))
ax1 = plt.subplot(111)
gen_colors = get_cols(len(genus.index.values))
handles = []
for g in range(len(genera)):
this_gen = genus.loc[genera[g], :].values
if g == 0:
ax1.bar(list(genus.columns.values), this_gen, color=gen_colors[g], edgecolor='k')
total = this_gen
else:
ax1.bar(list(genus.columns.values), this_gen, bottom=total, color=gen_colors[g], edgecolor='k')
total = this_gen+total
handles.append(Patch(facecolor=gen_colors[g], edgecolor='k', label=genera[g]))
empty = []
for x in range(0,21):
empty.append('')
ax1.text(x, -2, sample_names[x], color=colors[x], rotation=90, va='top', ha='center')
plt.xticks(range(0, 21), empty)
plt.xlim([-0.5,20.5])
plt.legend(handles=handles, bbox_to_anchor=(1,1.05), ncol=2)
plt.tight_layout()
plt.show()
#get the taxonomy files and sort them to strain and genus level
files = sorted(os.listdir('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann3/humann3_final_out/bugs_lists'))
all_genus, samples, all_strain = [], [], []
for f in files:
#if f != 'SRR8595490_metaphlan_bugs_list.tsv': continue
samples.append(f.split('_')[0])
fn = '/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann3/humann3_final_out/bugs_lists/'+f
taxa = pd.read_csv(fn, header=0, index_col=0, sep='\t')
taxa.drop(['additional_species', 'NCBI_tax_id'], axis=1, inplace=True)
taxa.rename(columns={'relative_abundance':f.split('_')[0]}, inplace=True)
if taxa.shape[0] == 1:
all_genus.append(taxa)
all_strain.append(taxa)
continue
tax_names = list(taxa.index.values)
keeping = []
unclassified, ind = [], []
for a in range(len(tax_names)):
if 's__' in tax_names[a]:
keeping.append(True)
elif 'UNKNOWN' in tax_names[a]:
keeping.append(True)
elif 'unclassified' in tax_names[a].split('__')[-1]:
keeping.append(True)
unclassified.append(tax_names[a])
ind.append(a)
else:
keeping.append(False)
for a in range(len(unclassified)):
for b in range(len(unclassified)):
if unclassified[a] in unclassified[b]:
keeping[ind[a]] = False
strain = taxa.loc[keeping, :]
all_strain.append(strain)
strain_names = list(strain.index.values)
strain_dict = {}
for i in range(len(strain_names)):
if 's__' in strain_names[i]:
strain_dict[strain_names[i]] = strain_names[i].split('|s__')[0].split('|g__')[1].replace('_', ' ')
else:
strain_dict[strain_names[i]] = strain_names[i].split('__')[1].replace('_', ' ')
genus = strain.rename(index=strain_dict)
genus = genus.groupby(by=genus.index, axis=0).sum()
all_genus.append(genus)
all_genus = pd.concat(all_genus).fillna(value=0)
all_genus = all_genus.groupby(by=all_genus.index, axis=0).sum()
all_strain = pd.concat(all_strain).fillna(value=0)
all_strain = all_strain.groupby(by=all_strain.index, axis=0).sum()
strain_t = all_strain.transpose()
genus_t = all_genus.transpose()
handles1 = [Patch(facecolor=colors_dict[color], edgecolor='k', label=color) for color in colors_dict]
handles2 = [Line2D([0], [0], marker=shapes_dict[shape], color='w', label=shape, markerfacecolor='k', markersize=15) for shape in shapes_dict]
strain_pos, strain_npos, strain_stress = transform_for_NMDS(strain_t, 'braycurtis')
genus_pos, genus_npos, genus_stress = transform_for_NMDS(genus_t, 'braycurtis')
plt.figure(figsize=(10,5))
ax1, ax2 = plt.subplot(121), plt.subplot(122)
for a in range(len(strain_npos)):
ax1.scatter(strain_npos[a,0], strain_npos[a,1], marker=shapes[a], color=colors[a], s=100)
ax2.scatter(genus_npos[a,0], genus_npos[a,1], marker=shapes[a], color=colors[a], s=100)
ax1.set_xlabel('nMDS1')
ax2.set_xlabel('nMDS1')
ax1.set_ylabel('nMDS2')
ax1.set_title('Strain')
ax2.set_title('Genus')
ax2.legend(handles=handles1+handles2, bbox_to_anchor=(1,1))
plt.tight_layout()
plt.show()
strain_pos, strain_npos, strain_stress = transform_for_NMDS(strain_t, 'euclidean')
genus_pos, genus_npos, genus_stress = transform_for_NMDS(genus_t, 'euclidean')
plt.figure(figsize=(10,5))
ax1, ax2 = plt.subplot(121), plt.subplot(122)
for a in range(len(strain_npos)):
ax1.scatter(strain_npos[a,0], strain_npos[a,1], marker=shapes[a], color=colors[a], s=100)
ax2.scatter(genus_npos[a,0], genus_npos[a,1], marker=shapes[a], color=colors[a], s=100)
ax1.set_xlabel('nMDS1')
ax2.set_xlabel('nMDS1')
ax1.set_ylabel('nMDS2')
ax1.set_title('Strain')
ax2.set_title('Genus')
ax2.legend(handles=handles1+handles2, bbox_to_anchor=(1.4,1))
plt.tight_layout()
plt.show()
strain_pos, strain_npos, strain_stress = transform_for_NMDS(strain_t, 'jaccard')
genus_pos, genus_npos, genus_stress = transform_for_NMDS(genus_t, 'jaccard')
plt.figure(figsize=(10,5))
ax1, ax2 = plt.subplot(121), plt.subplot(122)
for a in range(len(strain_npos)):
ax1.scatter(strain_npos[a,0], strain_npos[a,1], marker=shapes[a], color=colors[a], s=100)
ax2.scatter(genus_npos[a,0], genus_npos[a,1], marker=shapes[a], color=colors[a], s=100)
ax1.set_xlabel('nMDS1')
ax2.set_xlabel('nMDS1')
ax1.set_ylabel('nMDS2')
ax1.set_title('Strain')
ax2.set_title('Genus')
ax2.legend(handles=handles1+handles2, bbox_to_anchor=(1.4,1))
plt.tight_layout()
plt.show()
I haven’t included this plot for MetaPhlAn3 as all taxa (that are classified) are classified as bacteria. There are now no viruses in the output, but those samples that were all viral reads in MetaPhlAn2 are now all unclassified/‘unknown’.
Here the relative abundance of taxa calulated by MetaPhlAn3 are plotted at the Genus level for each sample. Genera with below 1% maximum relative abundance have been removed.
genus = all_genus[all_genus.max(axis=1) > 1]
genera = list(genus.index.values)
plt.figure(figsize=(10,5))
ax1 = plt.subplot(111)
gen_colors = get_cols(len(genus.index.values))
handles = []
for g in range(len(genera)):
this_gen = genus.loc[genera[g], :].values
if g == 0:
ax1.bar(list(genus.columns.values), this_gen, color=gen_colors[g], edgecolor='k')
total = this_gen
else:
ax1.bar(list(genus.columns.values), this_gen, bottom=total, color=gen_colors[g], edgecolor='k')
total = this_gen+total
if 'XIII' in genera[g]:
genera[g] = genera[g].replace('XIII', 'XIII\n')
handles.append(Patch(facecolor=gen_colors[g], edgecolor='k', label=genera[g]))
empty = []
for x in range(0,21):
empty.append('')
ax1.text(x, -2, sample_names[x], color=colors[x], rotation=90, va='top', ha='center')
plt.xticks(range(0, 21), empty)
plt.xlim([-0.5,20.5])
plt.legend(handles=handles, bbox_to_anchor=(1,1.05), ncol=2)
plt.tight_layout()
plt.show()
#16
#get all samples into dataframes based on the database that they use
folders = sorted(os.listdir('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2'))
del folders[0]
kraken_columns = {0:'Percent fragments clade', 1:'Number fragments clade', 2:'Number fragments taxon', 3:'Level', 4:'NCBI ID', 5:'Taxon name'}
kraken_all_db, bracken_all_db, all_domains = [], [], {}
for fol in folders:
if fol == 'db_genera' or fol == 'db_genera_in_common':
continue
if not os.path.isdir('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2/'+fol):
continue
bracken, kraken, bracken_kreport = [], [], []
bracken_pd, kraken_pd = [], []
for fi in sorted(os.listdir('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2/'+fol)):
if fi[-7:] == 'bracken':
bracken.append(fi)
elif fi[-7:] == 'kreport' and 'bracken' not in fi:
kraken.append(fi)
elif fi[-7:] == 'kreport':
bracken_kreport.append(fi)
for bk in bracken_kreport:
with open('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2/'+fol+'/'+bk, 'rU') as f:
bk = []
domains = {}
this_domain, domain_name = [], ''
for row in csv.reader(f, delimiter='\t'):
bk.append(row)
row[5] = row[5].lstrip()
if row[3] == 'D':
if domain_name != '':
domains[domain_name] = this_domain
this_domain, domain_name = [], row[5]
else:
if row[3] != 'R' and row[3] != 'U' and 'D' not in row[3]:
this_domain.append(row[5])
domains[domain_name] = this_domain
for domain in domains:
if domain in all_domains:
all_domains[domain] = list(set(all_domains[domain]+domains[domain]))
else:
all_domains[domain] = list(set(domains[domain]))
for b in bracken:
if len(b) > 22:
continue
sample = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2/'+fol+'/'+b, sep='\t', header=0, index_col=0)
b = b.replace('_150', '')
sample.drop(['taxonomy_id', 'taxonomy_lvl', 'kraken_assigned_reads', 'added_reads', 'fraction_total_reads'], axis=1, inplace=True)
sample.rename(columns={'new_est_reads':b[:-8]}, inplace=True)
bracken_pd.append(sample)
for k in kraken:
sample = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2/'+fol+'/'+k, sep='\t', header=None, index_col=3)
sample = sample.loc[['U', 'D'], :]
sample = sample.rename(columns=kraken_columns).drop(['Number fragments taxon', 'NCBI ID'], axis=1).rename(columns={'Percent fragments clade':k[:-8]+'_percent', 'Number fragments clade':k[:-8]+'_reads'}).set_index('Taxon name')
taxa = list(sample.index.values)
taxa_dict = {}
for t in taxa:
taxa_dict[t] = t.replace(' ', '')
sample = sample.rename(index=taxa_dict)
kraken_pd.append(sample)
bracken = pd.concat(bracken_pd, join='outer')
kraken = pd.concat(kraken_pd, join='outer')
kraken = kraken.rename(index={'d__Bacteria':'Bacteria', 'd__Archaea':'Archaea'})
kraken = kraken.groupby(by=kraken.index, axis=0).sum()
bracken = bracken.groupby(by=bracken.index, axis=0).sum().fillna(value=0)
kraken_all_db.append(kraken), bracken_all_db.append(bracken)
#17
x1 = [x for x in range(21)]
x2 = [x+0.3 for x in range(21)]
tax_plotting = ['Archaea', 'Bacteria', 'Eukaryota', 'Viruses', 'unclassified']
color_plotting = ['#EDBB99', '#5499C7', '#7DCEA0', '#F7DC6F', '#CCD1D1']
tax_paper = ['Bacteria', 'Eukaryota', 'Other', 'Unclassified']
color_paper = ['#5499C7', '#7DCEA0', '#CD6155', '#CCD1D1']
from_paper = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2/from_paper.csv', header=0, index_col=0)
def get_summary_reads(kraken_db):
fig = plt.figure(figsize=(15,15))
ax1, ax2, ax3, ax4, ax5 = plt.subplot(321), plt.subplot(322), plt.subplot(323), plt.subplot(324), plt.subplot(325)
ax1.set_title('Minikraken V1\n(without human genome)'), ax2.set_title('Minikraken V2\n(with human genome)'), ax3.set_title('GTDB'), ax4.set_title('RefSeq Complete V93')
ax5.set_title('From paper')
ax_plot = [ax3, ax3, ax1, ax1, ax2, ax2, ax4, ax4]
x_plot = [x1, x2, x1, x2, x1, x2, x1, x2]
axs = [ax1, ax2, ax3, ax4, ax5]
for s in range(len(samples)):
if s == 0:
continue
bottom = 0
for t in range(len(tax_paper)):
ax5.bar(x1[s], from_paper.loc[tax_paper[t], samples[s]], bottom=bottom, color=color_paper[t], edgecolor='k', width=0.6)
bottom += from_paper.loc[tax_paper[t], samples[s]]
for db in range(len(kraken_db)):
ax_using = ax_plot[db]
x = x_plot[db]
db = kraken_db[db]
handles = []
for tax in range(len(tax_plotting)):
handles.append(Patch(facecolor=color_plotting[tax], edgecolor='k', label=tax_plotting[tax]))
tax = tax_plotting[tax]
if tax not in list(db.index.values):
db.loc[tax] = [0 for i in range(db.shape[1])]
handles.append(Patch(facecolor=color_paper[2], edgecolor='k', label='Other'))
db = db.fillna(value=0)
for s in range(len(samples)):
bottom = 0
for t in range(len(tax_plotting)):
prop = db.loc[tax_plotting[t], samples[s]+'_reads']
cat = total_reads.loc[samples[s], 'cat_reads']
prop = (prop/cat)*100
ax_using.bar(x[s], prop, bottom=bottom, color=color_plotting[t], edgecolor='k', width=0.3)
bottom += prop
ax2.legend(handles=handles, bbox_to_anchor=(1,1.05))
for ax in axs:
plt.sca(ax)
plt.xticks(x1, ['' for x in x1])
plt.ylim([0, 100])
plt.xlim([-0.5, 20.5])
for x in x1:
ax5.text(x, -2, sample_names[x], color=colors[x], rotation=90, va='top', ha='center')
ax4.text(x, -2, sample_names[x], color=colors[x], rotation=90, va='top', ha='center')
ax1.set_ylabel('Classified (%)'), ax3.set_ylabel('Classified (%)'), ax5.set_ylabel('Classified(%)')
#plt.tight_layout()
return
def get_summary_bacteria(kraken_db):
tax_plotting = ['Bacteria']
alpha = ['#5499C7', '#F1C40F', '#5499C7', '#F1C40F', '#5499C7', '#F1C40F', '#5499C7', '#F1C40F']
fig = plt.figure(figsize=(10,8))
ax1, ax2, ax3, ax4 = plt.subplot(221), plt.subplot(222), plt.subplot(223), plt.subplot(224)
ax1.set_title('Minikraken V1\n(without human genome)'), ax2.set_title('Minikraken V2\n(with human genome)'), ax3.set_title('GTDB'), ax4.set_title('RefSeq Complete V93')
ax_plot = [ax3, ax3, ax1, ax1, ax2, ax2, ax4, ax4]
x_plot = [x1, x2, x1, x2, x1, x2, x1, x2]
axs = [ax1, ax2, ax3, ax4]
for db in range(len(kraken_db)):
ax_using = ax_plot[db]
x = x_plot[db]
alp = alpha[db]
db = kraken_db[db]
for tax in range(len(tax_plotting)):
tax = tax_plotting[tax]
if tax not in list(db.index.values):
db.loc[tax] = [0 for i in range(db.shape[1])]
db = db.fillna(value=0)
for s in range(len(samples)):
bottom = 0
for t in range(len(tax_plotting)):
prop = db.loc[tax_plotting[t], samples[s]+'_reads']
cat = total_reads.loc[samples[s], 'cat_reads']
ax_using.bar(x[s], prop, bottom=bottom, color=alp, edgecolor='k', width=0.3)
bottom += prop
handles = []
handles.append(Patch(facecolor=alpha[0], edgecolor='k', label='No confidence value'))
handles.append(Patch(facecolor=alpha[1], edgecolor='k', label='Confidence=0.1'))
ax2.legend(handles=handles, bbox_to_anchor=(1.6,1.03))
for ax in axs:
plt.sca(ax)
plt.xticks(x1, ['' for x in x1])
plt.semilogy()
plt.xlim([-0.5, 20.5])
#plt.ylim([0, 100])
plt.xlim([-0.5, 20.5])
for x in x1:
pl = ((1/21)*(x+1))-0.02
ax3.text(pl, -0.03, sample_names[x], color=colors[x], rotation=90, va='top', ha='center', transform=ax3.transAxes)
ax4.text(pl, -0.03, sample_names[x], color=colors[x], rotation=90, va='top', ha='center', transform=ax4.transAxes)
ax1.set_ylabel('Number of reads'), ax3.set_ylabel('Number of reads')
#plt.tight_layout()
return
A summary of the percentage of reads classified as different domains with different databases. Note that the ‘From paper’ plot uses the classifications given in the original paper, where 10,000 unmapped reads were classified using BLAST searches of the NCBI database.
#18
get_summary_reads(kraken_all_db)
plt.show()
Summary of the number of reads that are classified as bacteria by each database.
#19
get_summary_bacteria(kraken_all_db)
plt.tight_layout()
plt.show()
These first plots are all separately with the confidence parameter set. See the last tab for those without the confidence parameter set.
#20
db_names = ['gtdb', 'gtdb_conf', 'minikraken', 'minikraken_conf', 'minikraken_human', 'minikraken_human_conf', 'refseq', 'refseq_conf']
bacteria = all_domains['Bacteria']+all_domains['d__Bacteria']
genera, gen_names, genera_1, gen_names_1, strain, gen_sums = [], [], [], [], [], []
for db in range(len(bracken_all_db)):
d = int(db)
db = bracken_all_db[db]
species = list(db.index.values)
keeping = []
species_dict = {}
for sp in species:
if sp in bacteria:
keeping.append(True)
new_sp = sp.split('__')
if len(new_sp) > 1:
new_sp = new_sp[1]
else:
new_sp = new_sp[0]
species_dict[sp] = new_sp.split(' ')[0].replace("'", '')
else:
keeping.append(False)
in_len = db.shape[0]
db = db.loc[keeping, :]
strain.append(db)
db = db.rename(index=species_dict)
db = db.groupby(by=db.index, axis=0).sum()
sums = db.sum(axis=0)
gen_sums.append(sums)
db = db.divide(sums, axis=1).multiply(100)
genera.append(db)
db.to_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2/db_genera/'+db_names[d]+'.csv')
gen_names = gen_names+list(db.index.values)
db = db[db.max(axis=1) > 1]
genera_1.append(db)
gen_names_1 = gen_names_1+list(db.index.values)
gen_names = list(set(gen_names))
gen_names_1 = list(set(gen_names_1))
#21
def plot_four_nmds(dbs, metric, name):
fig = plt.figure(figsize=(15,10))
#fig.suptitle(name+metric+'\n\n\n')
ax1, ax2, ax3, ax4 = plt.subplot(221), plt.subplot(222), plt.subplot(223), plt.subplot(224)
axs = [ax3, ax1, ax2, ax4]
ax1.set_title('Minikraken V1\n(without human genome)'), ax2.set_title('Minikraken V2\n(with human genome)'), ax3.set_title('GTDB'), ax4.set_title('RefSeq Complete V93')
for db in range(len(dbs)):
n = db
db = dbs[db].transpose()
pos, npos, stress = transform_for_NMDS(db, metric)
for a in range(len(npos)):
axs[n].scatter(npos[a,0], npos[a,1], marker=shapes[a], color=colors[a], s=100, edgecolor='k')
axs[n].set_xlabel('nMDS1')
axs[n].set_ylabel('nMDS2')
handles1 = [Patch(facecolor=colors_dict[color], edgecolor='k', label=color) for color in colors_dict]
handles2 = [Line2D([0], [0], marker=shapes_dict[shape], color='w', label=shape, markerfacecolor='k', markersize=15) for shape in shapes_dict]
ax2.legend(handles=handles1+handles2, bbox_to_anchor=(1,1))
plt.tight_layout()
return
#22
plot_four_nmds([strain[1], strain[3], strain[5], strain[7]], 'braycurtis', 'NMDS confidence=0.1 strain ')
plt.show()
#23
plot_four_nmds([genera[1], genera[3], genera[5], genera[7]], 'braycurtis', 'NMDS confidence=0.1 genera ')
plt.show()
#24
plot_four_nmds([strain[1], strain[3], strain[5], strain[7]], 'euclidean', 'NMDS confidence=0.1 strain ')
plt.show()
#25
plot_four_nmds([genera[1], genera[3], genera[5], genera[7]], 'euclidean', 'NMDS confidence=0.1 genera ')
plt.show()
#26
plot_four_nmds([strain[1], strain[3], strain[5], strain[7]], 'jaccard', 'NMDS confidence=0.1 strain ')
plt.show()
#27
plot_four_nmds([genera[1], genera[3], genera[5], genera[7]], 'jaccard', 'NMDS confidence=0.1 genera ')
plt.show()
Bray-curtis distance at strain level
#28
plot_four_nmds([strain[0], strain[2], strain[4], strain[6]], 'braycurtis', 'NMDS no confidence strain ')
plt.show()
Bray-curtis distance at genus level
#29
plot_four_nmds([genera[0], genera[2], genera[4], genera[6]], 'braycurtis', 'NMDS no confidence genera ')
plt.show()
Euclidean distance at strain level
#30
plot_four_nmds([strain[0], strain[2], strain[4], strain[6]], 'euclidean', 'NMDS no confidence strain ')
plt.show()
Euclidean distance at genus level
#31
plot_four_nmds([genera[0], genera[2], genera[4], genera[6]], 'euclidean', 'NMDS no confidence genera ')
plt.show()
Jaccard distance at strain level
#32
plot_four_nmds([strain[0], strain[2], strain[4], strain[6]], 'jaccard', 'NMDS no confidence strain ')
plt.show()
Jaccard distance at genus level
#33
plot_four_nmds([genera[0], genera[2], genera[4], genera[6]], 'jaccard', 'NMDS no confidence genera ')
plt.show()
These plots are now only calculated for the classifications that used confidence = 0.1. Genera with below 1% maximum relative abundance are removed and the numbers in brackets are the number of reads that were classified as bacteria.
#34
db_names = ['gtdb', 'gtdb_conf', 'minikraken', 'minikraken_conf', 'minikraken_human', 'minikraken_human_conf', 'refseq', 'refseq_conf']
#bacteria = all_domains['Bacteria']+all_domains['d__Bacteria']
#genera, gen_names, genera_1, gen_names_1, strain, gen_sums = [], [], [], [], [], []
gen_names_1 = sorted(gen_names_1)
def plot_genera(db, sums, tax_cols, gen_names_1, dname):
plt.figure(figsize=(10,5))
ax1 = plt.subplot(111)
bottom = [0 for x in range(len(db.columns))]
handles = []
for g in range(len(gen_names_1)):
if gen_names_1[g] in db.index.values:
this_row = db.loc[gen_names_1[g], :].values
ax1.bar(x1, this_row, bottom=bottom, color=tax_cols[g], edgecolor='k')
handles.append(Patch(facecolor=tax_cols[g], edgecolor='k', label=gen_names_1[g]))
for b in range(len(bottom)):
bottom[b] += this_row[b]
ax1.legend(handles=handles, bbox_to_anchor=(1, 1.03), ncol=3)
plt.xticks(x1, ['' for x in x1])
plt.ylabel('Relative abundance(%)')
plt.xlim([-0.5, 20.5])
plt.ylim([0, 100])
for x in x1:
n = str(int(sums[samples[x]]))
ax1.text(x, -2, sample_names[x]+' ('+n+')', color=colors[x], rotation=90, va='top', ha='center')
plt.tight_layout()
return
gen_plot = [genera_1[1], genera_1[3], genera_1[5], genera_1[7]]
db_name = ['GTDB', 'Minikraken V1', 'Minikraken V2', 'RefSeq Complete V93']
all_sums = [gen_sums[1], gen_sums[3], gen_sums[5], gen_sums[7]]
tax_cols = get_cols(len(gen_names_1))
#35
plot_genera(gen_plot[1], all_sums[1], tax_cols, gen_names_1, db_name[1])
plt.show()
#36
plot_genera(gen_plot[2], all_sums[2], tax_cols, gen_names_1, db_name[2])
plt.show()
#37
plot_genera(gen_plot[0], all_sums[0], tax_cols, gen_names_1, db_name[0])
plt.show()
#38
plot_genera(gen_plot[3], all_sums[3], tax_cols, gen_names_1, db_name[3])
plt.show()
Here I have carried out ANCOM2 tests for differential abundance of genera between body sites. All genera are then plotted on a heatmap with mean values for each body site and stars to denote significant differences as determined by ANCOM.
While many of these results vary between databases, it is worth noting that some differences are consistent, e.g.:
1. Prevotella are always more abundant in saliva samples
2. Streptococcus are always more abundant in buccal samples
3. Klebsiella (where present) are always more abundant in blood samples
#39
def get_differences(new_genus, db_name):
new_genus = new_genus.drop(['SRR8595488'], axis=1)
new_genus = new_genus[new_genus.max(axis=1) > 0.1]
samples = list(new_genus.columns)
list_comparisons = [['Blood', 'Saliva'], ['Blood', 'Buccal'], ['Saliva', 'Buccal'], ['Saliva', 'Saliva_euk'], ['Buccal', 'Buccal_euk'], ['Blood', 'Saliva_euk'], ['Blood', 'Buccal_euk'], ['Saliva_euk', 'Buccal_euk']]
comparisons, metadata, comp_len = [], [], []
for a in range(len(list_comparisons)):
keeping = []
this_md = []
for b in range(len(samples)):
if site_dict[samples[b]] in list_comparisons[a]:
keeping.append(True)
this_md.append([samples[b], site_dict[samples[b]]])
else:
keeping.append(False)
this_comp = new_genus.loc[:, keeping]
this_comp = this_comp[this_comp.max(axis=1) > 0.1]
comparisons.append(this_comp)
this_md = pd.DataFrame(this_md, columns=['Samples', 'Groups'])
#this_md.set_index('Samples', inplace=True)
metadata.append(this_md)
comp_len.append(this_comp.shape[0])
new_genus = new_genus.rename(columns=site_dict, inplace=False)
return comparisons, metadata, new_genus, comp_len
db_name = ['GTDB', 'Minikraken V1', 'Minikraken V2', 'RefSeq Complete V93']
comparison_names = [r'Blood vs saliva', r'Blood vs buccal', r'Saliva vs buccal', r'Saliva vs saliva_euk', r'Buccal vs buccal_euk', r'Blood vs saliva_euk', r'Blood vs buccal_euk', r'Saliva_euk vs buccal_euk']
source("/Users/robynwright/Documents/OneDrive/Github/R-notebooks/ancom_v2.1.R")
get_ancom <- function(ft, md) {
all_ancom = list()
for (a in 1:8){
feature_table = ft[a]
meta_data = md[a]
process = feature_table_pre_process(feature_table, meta_data, 'Samples', 'Groups', lib_cut=10, neg_lb=TRUE)
ancom_out = ANCOM(process$feature_table, process$meta_data, process$struc_zero, main_var='Groups')
all_ancom[[a]] <- ancom_out$out
}
return(all_ancom)
}
def sort_ancom_results(r_ancom):
ancom_lists_09, ancom_lists_08, ancom_lists_07, ancom_lists_06 = [], [], [], []
for a in range(len(r_ancom)):
this_sig_09, this_sig_08, this_sig_07, this_sig_06 = [], [], [], []
r_ancom[a].set_index('taxa_id', inplace=True)
all_sp = list(r_ancom[a].index.values)
for b in range(len(all_sp)):
if r_ancom[a].loc[all_sp[b], 'detected_0.9'] == True:
this_sig_09.append(all_sp[b])
if r_ancom[a].loc[all_sp[b], 'detected_0.8'] == True:
this_sig_08.append(all_sp[b])
if r_ancom[a].loc[all_sp[b], 'detected_0.7'] == True:
this_sig_07.append(all_sp[b])
if r_ancom[a].loc[all_sp[b], 'detected_0.6'] == True:
this_sig_06.append(all_sp[b])
ancom_lists_09.append(this_sig_09), ancom_lists_08.append(this_sig_08), ancom_lists_07.append(this_sig_07), ancom_lists_06.append(this_sig_06)
return [ancom_lists_09, ancom_lists_08, ancom_lists_07, ancom_lists_06]
def plot_heatmap(new_genus, ANCOM):
print(ANCOM)
names = ['Blood', 'Saliva', 'Buccal', 'Saliva_euk', 'Buccal_euk']
other_names = ['Abundance', r'Blood $vs$ saliva', r'Blood $vs$ buccal', r'Saliva $vs$ buccal', r'Saliva $vs$ saliva_euk', r'Buccal $vs$ buccal_euk', r'Blood $vs$ saliva_euk', r'Blood $vs$ buccal_euk', r'Saliva_euk $vs$ buccal_euk']
colormap, norm = mpl.cm.get_cmap('plasma', 256), mpl.colors.Normalize(vmin=0, vmax=1)
m = mpl.cm.ScalarMappable(norm=norm, cmap=colormap)
if list(new_genus.index.values)[0][0] == 'A':
new_genus = new_genus.iloc[::-1]
figure = plt.figure(figsize=(5,new_genus.shape[0]*0.2))
ax1 = plt.subplot(111)
genus_names = list(new_genus.index.values)
y = []
for g in range(len(genus_names)):
this_row = new_genus.loc[genus_names[g], :]
values = [(np.mean(this_row[name].values)) for name in names]
bottom, top, x = [g for a in range(5)], [1 for a in range(5)], [a for a in range(5)]
y.append(g+0.5)
ma = max(values)
values = [v/ma for v in values]
values = [m.to_rgba(v) for v in values]
ax1.bar(x, top, bottom=bottom, color=values, edgecolor='k', width=1)
x.append(5)
ax1.scatter(x[-1], bottom[-1]+0.5, color='k', s=ma*5)
sig, x_plt = [], x[-1]
for a in range(len(ANCOM)):
x_plt += 0.75
if genus_names[g] in ANCOM[a]: ax1.scatter(x_plt, bottom[-1]+0.5, marker='*', color='k')
x.append(x_plt)
plt.ylim([0, len(genus_names)]), plt.xlim([-0.5, x[-1]+0.5])
plt.yticks(y, genus_names)
plt.xticks(x, names+other_names, rotation=90)
ax1.xaxis.set_ticks_position('top')
plt.tight_layout()
plt.show()
return
These differences are determined by ANCOM2 and the most conservative of these is the ANCOM 0.9 set, which is then used for plotting heatmaps.
this_genera_db = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2/db_genera/minikraken_conf.csv', header=0, index_col=0)
comparisons, md, new_genus, comp_len = get_differences(this_genera_db, db_name[1])
c1, c2, c3, c4, c5, c6, c7, c8 = comparisons[0], comparisons[1], comparisons[2], comparisons[3], comparisons[4], comparisons[5], comparisons[6], comparisons[7]
md1, md2, md3, md4, md5, md6, md7, md8 = md[0], md[1], md[2], md[3], md[4], md[5], md[6], md[7]
ft = list(py$c1, py$c2, py$c3, py$c4, py$c5, py$c6, py$c7, py$c8)
md = list(py$md1, py$md2, py$md3, py$md4, py$md5, py$md6, py$md7, py$md8)
all_ancom = get_ancom(ft, md)
ancom = sort_ancom_results(r.all_ancom)
ancom_df = []
for c in range(len(comparison_names)):
this_row = [comparison_names[c], comp_len[c], len(ancom[3][c]), len(ancom[2][c]), len(ancom[1][c]), len(ancom[0][c])]
ancom_df.append(this_row)
ancom_df = pd.DataFrame(ancom_df, columns=['Comparison', 'Shared genera', 'ANCOM 0.6', 'ANCOM 0.7', 'ANCOM 0.8', 'ANCOM 0.9'])
py$ancom_df %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Comparison | Shared genera | ANCOM 0.6 | ANCOM 0.7 | ANCOM 0.8 | ANCOM 0.9 |
|---|---|---|---|---|---|
| Blood vs saliva | 41 | 41 | 37 | 26 | 8 |
| Blood vs buccal | 37 | 27 | 21 | 15 | 3 |
| Saliva vs buccal | 45 | 3 | 3 | 2 | 1 |
| Saliva vs saliva_euk | 41 | 0 | 0 | 0 | 0 |
| Buccal vs buccal_euk | 39 | 2 | 0 | 0 | 0 |
| Blood vs saliva_euk | 38 | 32 | 24 | 15 | 3 |
| Blood vs buccal_euk | 34 | 11 | 5 | 2 | 2 |
| Saliva_euk vs buccal_euk | 43 | 5 | 3 | 2 | 1 |
Relative abundance is scaled within each genus, with yellow being highest in abundance and purple lowest. Blobs are scaled to maximum relative abundance. Stars denote genera that are significantly differentially abundant between body sites (ANCOM2 0.9).
plot_heatmap(new_genus, ancom[0])
These differences are determined by ANCOM2 and the most conservative of these is the ANCOM 0.9 set, which is then used for plotting heatmaps.
this_genera_db = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2/db_genera/minikraken_human_conf.csv', header=0, index_col=0)
comparisons, md, new_genus, comp_len = get_differences(this_genera_db, db_name[1])
c1, c2, c3, c4, c5, c6, c7, c8 = comparisons[0], comparisons[1], comparisons[2], comparisons[3], comparisons[4], comparisons[5], comparisons[6], comparisons[7]
md1, md2, md3, md4, md5, md6, md7, md8 = md[0], md[1], md[2], md[3], md[4], md[5], md[6], md[7]
ft = list(py$c1, py$c2, py$c3, py$c4, py$c5, py$c6, py$c7, py$c8)
md = list(py$md1, py$md2, py$md3, py$md4, py$md5, py$md6, py$md7, py$md8)
all_ancom = get_ancom(ft, md)
ancom = sort_ancom_results(r.all_ancom)
ancom_df = []
for c in range(len(comparison_names)):
this_row = [comparison_names[c], comp_len[c], len(ancom[3][c]), len(ancom[2][c]), len(ancom[1][c]), len(ancom[0][c])]
ancom_df.append(this_row)
ancom_df = pd.DataFrame(ancom_df, columns=['Comparison', 'Shared genera', 'ANCOM 0.6', 'ANCOM 0.7', 'ANCOM 0.8', 'ANCOM 0.9'])
py$ancom_df %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Comparison | Shared genera | ANCOM 0.6 | ANCOM 0.7 | ANCOM 0.8 | ANCOM 0.9 |
|---|---|---|---|---|---|
| Blood vs saliva | 49 | 48 | 47 | 38 | 9 |
| Blood vs buccal | 48 | 35 | 16 | 14 | 4 |
| Saliva vs buccal | 39 | 5 | 4 | 1 | 0 |
| Saliva vs saliva_euk | 30 | 0 | 0 | 0 | 0 |
| Buccal vs buccal_euk | 37 | 0 | 0 | 0 | 0 |
| Blood vs saliva_euk | 49 | 47 | 33 | 22 | 5 |
| Blood vs buccal_euk | 50 | 19 | 14 | 12 | 4 |
| Saliva_euk vs buccal_euk | 42 | 4 | 3 | 3 | 2 |
Relative abundance is scaled within each genus, with yellow being highest in abundance and purple lowest. Blobs are scaled to maximum relative abundance. Stars denote genera that are significantly differentially abundant between body sites (ANCOM2 0.9).
plot_heatmap(new_genus, ancom[0])
These differences are determined by ANCOM2 and the most conservative of these is the ANCOM 0.9 set, which is then used for plotting heatmaps.
this_genera_db = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2/db_genera/gtdb_conf.csv', header=0, index_col=0)
comparisons, md, new_genus, comp_len = get_differences(this_genera_db, db_name[1])
c1, c2, c3, c4, c5, c6, c7, c8 = comparisons[0], comparisons[1], comparisons[2], comparisons[3], comparisons[4], comparisons[5], comparisons[6], comparisons[7]
md1, md2, md3, md4, md5, md6, md7, md8 = md[0], md[1], md[2], md[3], md[4], md[5], md[6], md[7]
ft = list(py$c1, py$c2, py$c3, py$c4, py$c5, py$c6, py$c7, py$c8)
md = list(py$md1, py$md2, py$md3, py$md4, py$md5, py$md6, py$md7, py$md8)
all_ancom = get_ancom(ft, md)
ancom = sort_ancom_results(r.all_ancom)
ancom_df = []
for c in range(len(comparison_names)):
this_row = [comparison_names[c], comp_len[c], len(ancom[3][c]), len(ancom[2][c]), len(ancom[1][c]), len(ancom[0][c])]
ancom_df.append(this_row)
ancom_df = pd.DataFrame(ancom_df, columns=['Comparison', 'Shared genera', 'ANCOM 0.6', 'ANCOM 0.7', 'ANCOM 0.8', 'ANCOM 0.9'])
py$ancom_df %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Comparison | Shared genera | ANCOM 0.6 | ANCOM 0.7 | ANCOM 0.8 | ANCOM 0.9 |
|---|---|---|---|---|---|
| Blood vs saliva | 98 | 88 | 82 | 32 | 6 |
| Blood vs buccal | 91 | 21 | 14 | 8 | 5 |
| Saliva vs buccal | 96 | 18 | 10 | 2 | 1 |
| Saliva vs saliva_euk | 93 | 3 | 2 | 0 | 0 |
| Buccal vs buccal_euk | 95 | 8 | 5 | 1 | 0 |
| Blood vs saliva_euk | 92 | 27 | 17 | 11 | 4 |
| Blood vs buccal_euk | 75 | 7 | 7 | 3 | 2 |
| Saliva_euk vs buccal_euk | 97 | 19 | 13 | 7 | 1 |
Relative abundance is scaled within each genus, with yellow being highest in abundance and purple lowest. Blobs are scaled to maximum relative abundance. Stars denote genera that are significantly differentially abundant between body sites (ANCOM2 0.9).
plot_heatmap(new_genus, ancom[0])
These differences are determined by ANCOM2 and the most conservative of these is the ANCOM 0.9 set, which is then used for plotting heatmaps.
this_genera_db = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2/db_genera/refseq_conf.csv', header=0, index_col=0)
comparisons, md, new_genus, comp_len = get_differences(this_genera_db, db_name[1])
c1, c2, c3, c4, c5, c6, c7, c8 = comparisons[0], comparisons[1], comparisons[2], comparisons[3], comparisons[4], comparisons[5], comparisons[6], comparisons[7]
md1, md2, md3, md4, md5, md6, md7, md8 = md[0], md[1], md[2], md[3], md[4], md[5], md[6], md[7]
ft = list(py$c1, py$c2, py$c3, py$c4, py$c5, py$c6, py$c7, py$c8)
md = list(py$md1, py$md2, py$md3, py$md4, py$md5, py$md6, py$md7, py$md8)
all_ancom = get_ancom(ft, md)
ancom = sort_ancom_results(r.all_ancom)
ancom_df = []
for c in range(len(comparison_names)):
this_row = [comparison_names[c], comp_len[c], len(ancom[3][c]), len(ancom[2][c]), len(ancom[1][c]), len(ancom[0][c])]
ancom_df.append(this_row)
ancom_df = pd.DataFrame(ancom_df, columns=['Comparison', 'Shared genera', 'ANCOM 0.6', 'ANCOM 0.7', 'ANCOM 0.8', 'ANCOM 0.9'])
py$ancom_df %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Comparison | Shared genera | ANCOM 0.6 | ANCOM 0.7 | ANCOM 0.8 | ANCOM 0.9 |
|---|---|---|---|---|---|
| Blood vs saliva | 85 | 81 | 77 | 35 | 11 |
| Blood vs buccal | 82 | 65 | 36 | 21 | 7 |
| Saliva vs buccal | 69 | 9 | 6 | 3 | 0 |
| Saliva vs saliva_euk | 64 | 0 | 0 | 0 | 0 |
| Buccal vs buccal_euk | 76 | 4 | 4 | 1 | 0 |
| Blood vs saliva_euk | 86 | 72 | 37 | 20 | 4 |
| Blood vs buccal_euk | 79 | 18 | 15 | 8 | 2 |
| Saliva_euk vs buccal_euk | 84 | 12 | 10 | 5 | 1 |
Relative abundance is scaled within each genus, with yellow being highest in abundance and purple lowest. Blobs are scaled to maximum relative abundance. Stars denote genera that are significantly differentially abundant between body sites (ANCOM2 0.9).
plot_heatmap(new_genus, ancom[0])
Now we are looking at the taxa (grouped to genus level, to hopefully allow for differences between databases) that are in common between the different databases that have been used with Kraken2. We are also using only the versions that have confidence=0.1.
The plots below here show the number of reads in each sample before and after removal of the genera that aren’t present in all databases.
#print(bracken_all_db)
def intersection(lst1, lst2):
lst3 = [value for value in lst1 if value in lst2]
return lst3
conf_bracken_genus = []
genus_in_each = []
for a in range(len(bracken_all_db)):
if a not in [0, 2, 4, 6]:
continue
this_db = pd.DataFrame(bracken_all_db[a])
taxa = list(this_db.index.values)
tax_dict = {}
for b in range(len(taxa)):
orig_tax = taxa[b]
if 's__' in taxa[b]:
taxa[b] = taxa[b].replace('s__', '')
taxa[b] = taxa[b].split(' ')[0]
taxa[b] = taxa[b].replace("'", "")
tax_dict[orig_tax] = taxa[b]
this_db.rename(index=tax_dict, inplace=True)
genus = this_db.groupby(by=this_db.index, axis=0).sum()
conf_bracken_genus.append(genus)
genus_in_each.append(list(genus.index.values))
[print(len(gen)) for gen in genus_in_each]
overall_genus = intersection(genus_in_each[0], genus_in_each[1])
overall_genus = intersection(overall_genus, genus_in_each[2])
overall_genus = intersection(overall_genus, genus_in_each[3])
print(len(overall_genus))
fig = plt.figure(figsize=(10,6))
ax = [plt.subplot(223), plt.subplot(221), plt.subplot(222), plt.subplot(224)]
x1 = [x for x in range(21)]
x2 = [x+0.4 for x in range(21)]
x3 = [x+0.2 for x in range(21)]
conf_bracken_overall = []
names = ['gtdb', 'minikrakenv1', 'minikrakenv2', 'refseq']
labels = ['GTDB', 'Minikraken v1 (without human)', 'Minikraken v2 (with human)', 'RefSeq Complete v93']
sum_reduced = []
for db in range(len(conf_bracken_genus)):
new_db = pd.DataFrame(conf_bracken_genus[db].loc[overall_genus, :])
conf_bracken_overall.append(new_db)
new_db.to_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2/db_genera_in_common/'+names[db]+'_common.csv')
conf_bracken_genus[db].to_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2/db_genera_in_common/'+names[db]+'.csv')
sums = new_db.sum(axis=0)
sum_reduced.append(sums)
ax[db].bar(x2, sums, color=colors, edgecolor='k', width=0.4, alpha=0.5)
sums = conf_bracken_genus[db].sum(axis=0)
ax[db].bar(x1, sums, color=colors, edgecolor='k', width=0.4)
ax[db].semilogy()
ax[db].set_title(labels[db])
plt.sca(ax[db])
if db == 1 or db == 2:
plt.xticks([])
else:
plt.xticks(x3, sample_names, rotation=90)
ax[0].set_ylabel('Number of reads'), ax[1].set_ylabel('Number of reads')
handles = [Patch(facecolor='k', edgecolor='k', label='All genera'), Patch(facecolor='k', edgecolor='k', alpha=0.5, label='Genera present in all')]
ax[2].legend(handles=handles, loc='upper left', bbox_to_anchor=(1,1))
plt.tight_layout()
plt.show()
fig = plt.figure(figsize=(8,4))
ax = plt.subplot(111)
x1 = [x*5 for x in range(21)]
x2 = [x+1 for x in x1]
x3 = [x+1 for x in x2]
x4 = [x+1 for x in x3]
xplt = [x+0.5 for x in x2]
x = [x3, x1, x2, x4]
al = [0.6, 1, 0.8, 0.4]
for db in range(len(sum_reduced)):
ax.bar(x[db], sum_reduced[db], color=colors, width=1, edgecolor='k', alpha=al[db])
handles = [Patch(facecolor='k', edgecolor='k', label='Minikraken v1'), Patch(facecolor='k', edgecolor='k', alpha=0.8, label='Minikraken v2'), Patch(facecolor='k', edgecolor='k', alpha=0.6, label='GTDB'), Patch(facecolor='k', edgecolor='k', alpha=0.4, label='RefSeq Complete v93')]
ax.legend(handles=handles, loc='upper left', bbox_to_anchor=(1,1))
plt.xticks(xplt, sample_names, rotation=90)
plt.semilogy()
plt.ylabel('Number of reads')
plt.tight_layout()
plt.show()
for db in range(len(conf_bracken_overall)):
rename_samples = {}
for sample in list(conf_bracken_overall[db].columns):
rename_samples[sample] = sample+'_'+names[db]
this_db = pd.DataFrame(conf_bracken_overall[db].rename(columns=rename_samples))
if db == 0:
overall_genus = this_db
else:
overall_genus = pd.concat([overall_genus, this_db], axis=1)
overall_genus = pd.DataFrame(overall_genus)
#overall_genus = pd.DataFrame(overall_genus.divide(overall_genus.sum(axis=0)).multiply(100))
overall_genus = overall_genus[overall_genus.max(axis=1) > 25000]
sums = overall_genus.sum(axis=0)
gen_colors = get_cols(overall_genus.shape[0])
print(overall_genus)
fig = plt.figure(figsize=(14,10))
ax1 = plt.subplot(334)
ax = [ax1, plt.subplot(331, sharey=ax1), plt.subplot(332, sharey=ax1), plt.subplot(335, sharey=ax1)]
ax[1].set_ylabel('Number of reads')
ax[0].set_ylabel('Number of reads')
x1 = [x for x in range(21)]
a = 0
col_samples = list(overall_genus.columns)
#line 20
new_db = []
for name in names:
keeping = []
for s in list(overall_genus.columns):
if name in s:
keeping.append(True)
else:
keeping.append(False)
other_new_db = pd.DataFrame(overall_genus.loc[:, keeping])
new_db.append(other_new_db)
for db in range(len(new_db)):
this_genera = list(new_db[db].index.values)
handles = []
for g in range(len(this_genera)):
if g == 0:
bottom = [0 for c in x1]
these_values = new_db[db].loc[this_genera[g], :].values
ax[db].bar(x1, these_values, bottom=bottom, color=gen_colors[g], edgecolor='k', width=1)
handles.append(Patch(facecolor=gen_colors[g], edgecolor='k', label=this_genera[g]))
bottom = [bottom[x]+these_values[x] for x in range(len(bottom))]
plt.sca(ax[db])
if db == 1 or db == 2:
plt.xticks([])
else:
plt.xticks(x1, sample_names, rotation=90)
plt.semilogy()
ax[2].legend(handles=handles, loc='upper left', bbox_to_anchor=(1,1), ncol=3)
#plt.tight_layout()
plt.show()
for db in range(len(conf_bracken_overall)):
rename_samples = {}
for sample in list(conf_bracken_overall[db].columns):
rename_samples[sample] = sample+'_'+names[db]
this_db = pd.DataFrame(conf_bracken_overall[db].rename(columns=rename_samples))
if db == 0:
overall_genus = this_db
else:
overall_genus = pd.concat([overall_genus, this_db], axis=1)
overall_genus = pd.DataFrame(overall_genus)
overall_genus = pd.DataFrame(overall_genus.divide(overall_genus.sum(axis=0)).multiply(100))
overall_genus = overall_genus[overall_genus.max(axis=1) > 1]
sums = overall_genus.sum(axis=0)
gen_colors = get_cols(overall_genus.shape[0])
print(overall_genus)
fig = plt.figure(figsize=(14,10))
ax1 = plt.subplot(334)
ax = [ax1, plt.subplot(331, sharey=ax1), plt.subplot(332, sharey=ax1), plt.subplot(335, sharey=ax1)]
ax[1].set_ylabel('Number of reads')
ax[0].set_ylabel('Number of reads')
x1 = [x for x in range(21)]
a = 0
col_samples = list(overall_genus.columns)
#line 20
new_db = []
for name in names:
keeping = []
for s in list(overall_genus.columns):
if name in s:
keeping.append(True)
else:
keeping.append(False)
other_new_db = pd.DataFrame(overall_genus.loc[:, keeping])
new_db.append(other_new_db)
for db in range(len(new_db)):
this_genera = list(new_db[db].index.values)
handles = []
for g in range(len(this_genera)):
if g == 0:
bottom = [0 for c in x1]
these_values = new_db[db].loc[this_genera[g], :].values
ax[db].bar(x1, these_values, bottom=bottom, color=gen_colors[g], edgecolor='k', width=1)
handles.append(Patch(facecolor=gen_colors[g], edgecolor='k', label=this_genera[g]))
bottom = [bottom[x]+these_values[x] for x in range(len(bottom))]
plt.sca(ax[db])
if db == 1 or db == 2:
plt.xticks([])
else:
plt.xticks(x1, sample_names, rotation=90)
ax[2].legend(handles=handles, loc='upper left', bbox_to_anchor=(1,1), ncol=3)
#plt.tight_layout()
plt.show()
Here, each sample is plotted using each of the four different databases, from top to bottom: GTDB, Minikraken v1, Minikraken v2 and RefSeq Complete v93. Each genera is plotted as a column, with black indicating that it is present and white indicating that it is absent.
Note that for some sample sets, e.g. saliva, there is far more consensus within samples than in other sample sets, e.g. blood.
count = 0
plt.figure(figsize=(6,6))
x, y, b0, b1, b2, b3 = [], [], [], [], [], []
for s in full_name_dict:
print(s)
ax = plt.subplot2grid((21, 10), (count,2), colspan=8)
count += 1
nums, pres_colors = [], []
for db in conf_bracken_overall:
nums.append(list(db.loc[:, s].values))
pres_colors.append([])
for a in range(len(nums[0])):
if nums[0][a] > 0: pres_colors[0].append('k')
else: pres_colors[0].append('w')
if nums[1][a] > 0: pres_colors[1].append('k')
else: pres_colors[1].append('w')
if nums[2][a] > 0: pres_colors[2].append('k')
else: pres_colors[2].append('w')
if nums[3][a] > 0: pres_colors[3].append('k')
else: pres_colors[3].append('w')
if s == 'SRR8595488':
x.append(a), y.append(1), b0.append(0), b1.append(1), b2.append(2), b3.append(3)
ax.bar(x, y, bottom=b3, color=pres_colors[0], width=1)
ax.bar(x, y, bottom=b2, color=pres_colors[1], width=1)
ax.bar(x, y, bottom=b1, color=pres_colors[2], width=1)
ax.bar(x, y, bottom=b0, color=pres_colors[3], width=1)
plt.sca(ax)
plt.xticks([]), plt.yticks([2], [full_name_dict[s]])
plt.xlim([x[0]-0.5, x[-1]+0.5]), plt.ylim([0, 4])
plt.subplots_adjust(hspace=0.05)
plt.show()
Now I am combining the data from the 4 databases, and only plotting a genus in a sample if it is present in all 4 databases. Again, black indicates presence while white indicates absence.
count = 0
plt.figure(figsize=(6,6))
x, y = [], []
for s in full_name_dict:
ax = plt.subplot2grid((21, 10), (count,2), colspan=8)
count += 1
nums, pres_colors = [], []
for db in conf_bracken_overall:
nums.append(list(db.loc[:, s].values))
for a in range(len(nums[0])):
if nums[0][a] > 0 and nums[1][a] > 0 and nums[2][a] > 0 and nums[3][a] > 0: pres_colors.append('k')
else: pres_colors.append('w')
if s == 'SRR8595488':
x.append(a), y.append(1)
ax.bar(x, y, color=pres_colors, width=1)
plt.sca(ax)
plt.xticks([]), plt.yticks([0.5], [full_name_dict[s]])
plt.xlim([x[0]-0.5, x[-1]+0.5]), plt.ylim([0, 1])
plt.subplots_adjust(hspace=0.05)
plt.show()
Now I am combining the data from the 4 databases, and only plotting a genus in a sample if it is present in 3 of 4 databases. Again, black indicates presence while white indicates absence.
count = 0
plt.figure(figsize=(6,6))
x, y = [], []
for s in full_name_dict:
ax = plt.subplot2grid((21, 10), (count,2), colspan=8)
count += 1
nums, pres_colors = [], []
for db in conf_bracken_overall:
nums.append(list(db.loc[:, s].values))
for a in range(len(nums[0])):
this_num = [nums[0][a], nums[1][a], nums[2][a], nums[3][a]]
if sum(this_num) >= 3: pres_colors.append('k')
else: pres_colors.append('w')
if s == 'SRR8595488':
x.append(a), y.append(1)
ax.bar(x, y, color=pres_colors, width=1)
plt.sca(ax)
plt.xticks([]), plt.yticks([0.5], [full_name_dict[s]])
plt.xlim([x[0]-0.5, x[-1]+0.5]), plt.ylim([0, 1])
plt.subplots_adjust(hspace=0.05)
plt.show()
Now I am combining the data from the 4 databases, and only plotting a genus in a sample if it is present in 2 of 4 databases. Again, black indicates presence while white indicates absence.
count = 0
plt.figure(figsize=(6,6))
x, y = [], []
for s in full_name_dict:
ax = plt.subplot2grid((21, 10), (count,2), colspan=8)
count += 1
nums, pres_colors = [], []
for db in conf_bracken_overall:
nums.append(list(db.loc[:, s].values))
for a in range(len(nums[0])):
this_num = [nums[0][a], nums[1][a], nums[2][a], nums[3][a]]
if sum(this_num) >= 2: pres_colors.append('k')
else: pres_colors.append('w')
if s == 'SRR8595488':
x.append(a), y.append(1)
ax.bar(x, y, color=pres_colors, width=1)
plt.sca(ax)
plt.xticks([]), plt.yticks([0.5], [full_name_dict[s]])
plt.xlim([x[0]-0.5, x[-1]+0.5]), plt.ylim([0, 1])
plt.subplots_adjust(hspace=0.05)
plt.show()
These plots show that almost all genera are present in saliva samples, while the other body sites just have subsets of these. There is also the largest amount of consensus between saliva samples, and the use of different databases to determine presence/absence. I will now include the genera that are present in 3 of 4 databases for further analyses.
These plots are looking at the differences between body sites based on presence/absence (using the taxa that are present in at least 3 of 4 databases). I have also removed the HuRef blood before carrying out any comparisons.
new_db = []
for s in full_name_dict:
if s == 'SRR8595488': continue
nums, presence = [], [s]
for db in conf_bracken_overall:
nums.append(list(db.loc[:, s].values))
for a in range(len(nums[0])):
this_num = [nums[0][a], nums[1][a], nums[2][a], nums[3][a]]
if sum(this_num) >= 3: presence.append(1)
else: presence.append(0)
new_db.append(presence)
new_db = pd.DataFrame(new_db, columns=['Samples']+list(conf_bracken_overall[0].index.values))
new_db.set_index('Samples', inplace=True)
new_db = new_db.transpose()
new_db.to_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2/db_genera_in_common/combined_db_3of4.csv')
list_comparisons = [['Blood', 'Saliva'], ['Blood', 'Buccal'], ['Saliva', 'Buccal'], ['Saliva', 'Saliva_euk'], ['Buccal', 'Buccal_euk'], ['Blood', 'Saliva_euk'], ['Blood', 'Buccal_euk'], ['Saliva_euk', 'Buccal_euk']]
samples = list(new_db.columns)
comparisons, metadata, comp_len = [], [], []
for a in range(len(list_comparisons)):
keeping = []
this_md = []
for b in range(len(samples)):
if site_dict[samples[b]] in list_comparisons[a]:
keeping.append(True)
this_md.append([samples[b], site_dict[samples[b]]])
else:
keeping.append(False)
this_comp = new_db.loc[:, keeping]
comparisons.append(this_comp)
this_md = pd.DataFrame(this_md, columns=['Samples', 'Groups'])
#this_md.set_index('Samples', inplace=True)
metadata.append(this_md)
comp_len.append(this_comp.shape[0])
c1, c2, c3, c4, c5, c6, c7, c8 = comparisons[0], comparisons[1], comparisons[2], comparisons[3], comparisons[4], comparisons[5], comparisons[6], comparisons[7]
md1, md2, md3, md4, md5, md6, md7, md8 = metadata[0], metadata[1], metadata[2], metadata[3], metadata[4], metadata[5], metadata[6], metadata[7]
ft = list(py$c1, py$c2, py$c3, py$c4, py$c5, py$c6, py$c7, py$c8)
md = list(py$md1, py$md2, py$md3, py$md4, py$md5, py$md6, py$md7, py$md8)
all_ancom = get_ancom(ft, md)
ancom = sort_ancom_results(r.all_ancom)
ancom_df = []
comparison_names = [r'Blood vs saliva', r'Blood vs buccal', r'Saliva vs buccal', r'Saliva vs saliva_euk', r'Buccal vs buccal_euk', r'Blood vs saliva_euk', r'Blood vs buccal_euk', r'Saliva_euk vs buccal_euk']
for c in range(len(comparison_names)):
this_row = [comparison_names[c], comp_len[c], len(ancom[3][c]), len(ancom[2][c]), len(ancom[1][c]), len(ancom[0][c])]
ancom_df.append(this_row)
ancom_df = pd.DataFrame(ancom_df, columns=['Comparison', 'Shared genera', 'ANCOM 0.6', 'ANCOM 0.7', 'ANCOM 0.8', 'ANCOM 0.9'])
This is the table of presence/absence used for the ANCOM tests.
pres_abs = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2/db_genera_in_common/combined_db_3of4.csv', header=0, index_col=0)
pres_abs.rename(columns=full_name_dict, inplace=True)
py$pres_abs %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| PGPC-0002 Blood | PGPC-0005 Blood | PGPC-0006 Blood | PGPC-0050 Blood | PGPC-0002 Saliva | PGPC-0005 Saliva | PGPC-0006 Saliva | PGPC-0050 Saliva | PGPC-0002 Buccal | PGPC-0005 Buccal | PGPC-0006 Buccal | PGPC-0050 Buccal | PGPC-0002 Saliva_euk | PGPC-0005 Saliva_euk | PGPC-0006 Saliva_euk | PGPC-0050 Saliva_euk | PGPC-0002 Buccal_euk | PGPC-0005 Buccal_euk | PGPC-0006 Buccal_euk | PGPC-0050 Buccal_euk | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Acaryochloris | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Acetoanaerobium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Acetobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Acetobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Acetohalobium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Acetomicrobium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Acholeplasma | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Achromobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Acidaminococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Acidianus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Acidiferrobacter | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 |
| Acidihalobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Acidilobus | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 |
| Acidimicrobium | 0 | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 |
| Acidiphilium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Acidipropionibacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Acidisphaera | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Acidithiobacillus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Acidobacterium | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 |
| Acidothermus | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 0 | 1 | 1 | 0 |
| Aciduliprofundum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Acinetobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Actinoalloteichus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Actinobacillus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Actinomyces | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Actinoplanes | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Actinopolymorpha | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Actinopolyspora | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Actinosynnema | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Actinotignum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Adlercreutzia | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Advenella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Aequorivita | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Aeribacillus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Aerococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Aeromicrobium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Aeromonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Afipia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Agarilytica | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Agarivorans | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Aggregatibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Agrobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Agrococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Agromyces | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Ahniella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Akkermansia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Alcaligenes | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Alcanivorax | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Algibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Algoriphagus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Alicycliphilus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Alicyclobacillus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Aliivibrio | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Alistipes | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Alkalilimnicola | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 |
| Alkaliphilus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Alkalitalea | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Allochromatium | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Allofrancisella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Allokutzneria | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Altererythrobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Alteromonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Aminobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Aminobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Aminomonas | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 1 | 1 |
| Ammonifex | 1 | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 0 |
| Amphibacillus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Amycolatopsis | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Anabaena | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Anabaenopsis | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Anaerococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Anaerolinea | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| Anaeromyxobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Anaerostipes | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Anaerotignum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Anaplasma | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Anderseniella | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Aneurinibacillus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Anoxybacillus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Antarctobacter | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Aquabacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Aquaspirillum | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| Aquifex | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Aquiflexum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Aquimarina | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Aquitalea | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Arachidicoccus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Arcanobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Archaeoglobus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Arcobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Arcticibacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Arenibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Aromatoleum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Arsenicicoccus | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Arthrobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Arthrospira | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Asaia | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Asticcacaulis | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Atopobium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Aurantimicrobium | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 0 | 1 | 1 | 0 |
| Auraticoccus | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Aureimonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Aureitalea | 0 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Auricoccus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Auritidibacter | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 |
| Austwickia | 0 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 0 | 1 |
| Azorhizobium | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Azospira | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Azospirillum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Azotobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Bacillus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Bacterioplanes | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 |
| Bacteriovorax | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Bacteroides | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Barnesiella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Bartonella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Basilea | 1 | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Bdellovibrio | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Beggiatoa | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Beijerinckia | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Belliella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Bernardetia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Beutenbergia | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Bibersteinia | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Bifidobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Blastochloris | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 |
| Blastococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Blastomonas | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Blattabacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Blautia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Blochmannia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Bordetella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Borrelia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Borreliella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Bosea | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Brachybacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Brachyspira | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Bradymonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Bradyrhizobium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Brenneria | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Breoghania | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Brevefilum | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Brevibacillus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Brevibacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Brevundimonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Brochothrix | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Brucella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Buchnera | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Burkholderia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Butyrivibrio | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Caldanaerobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Caldicellulosiruptor | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Caldilinea | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Caldimicrobium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Caldisericum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Caldisphaera | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Calditerrivibrio | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Caldithrix | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Caldivirga | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Calothrix | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Campylobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Capnocytophaga | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Carboxydocella | 1 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Carboxydothermus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Cardinium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Carnobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Castellaniella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Catenovulum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Catenulispora | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Caulobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Cedecea | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Celeribacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Cellulomonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Cellulophaga | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Cellulosilyticum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Cellulosimicrobium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Cellvibrio | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Chamaesiphon | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Chania | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Chelativorans | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Chelatococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Chitinophaga | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Chlamydia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Chloracidobacterium | 0 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 |
| Chlorobaculum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Chlorobium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Chloroflexus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Chloroherpeton | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Christensenella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Chromobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Chromohalobacter | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Chroococcidiopsis | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Chryseobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Chryseolinea | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Chthonomonas | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Citrobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Citromicrobium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Clavibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Cloacibacillus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Clostridioides | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Clostridium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Cnuibacter | 0 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| Cobetia | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Cohaesibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Collimonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Collinsella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Colwellia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Comamonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Conexibacter | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Confluentimicrobium | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Congregibacter | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Coprothermobacter | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Coraliomargarita | 0 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Corallococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Coriobacterium | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 |
| Corynebacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Coxiella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Crenobacter | 1 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 |
| Crinalium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Croceibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Croceicoccus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Cronobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Cryobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Cryptobacterium | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| Cuniculiplasma | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Cupriavidus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Curtobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Curvibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Cutibacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Cyanobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Cyanobium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Cyanothece | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Cyclobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Cycloclasticus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Cylindrospermum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Cystobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Cytophaga | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Dechloromonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Deferribacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Defluviimonas | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Defluviitoga | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Dehalobacter | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Dehalobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Dehalococcoides | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Dehalogenimonas | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Deinococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Delftia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Denitrobacterium | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 1 |
| Denitrovibrio | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Dermabacter | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 0 | 1 |
| Dermacoccus | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Dermatophilus | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Desulfarculus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Desulfatibacillum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Desulfitobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Desulfobacca | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Desulfobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Desulfobacula | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Desulfobulbus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Desulfocapsa | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Desulfococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Desulfohalobium | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| Desulfomicrobium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Desulfomonile | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Desulfosporosinus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Desulfotalea | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| Desulfotomaculum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Desulfovibrio | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Desulfurella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Desulfurispirillum | 0 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 |
| Desulfurivibrio | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Desulfurobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Desulfurococcus | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Desulfuromonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Devosia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Devriesea | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Dialister | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Diaphorobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Dichelobacter | 0 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Dickeya | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Dictyoglomus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Dietzia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Dinoroseobacter | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 |
| Dokdonella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Dokdonia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Dolichospermum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Draconibacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Dyadobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Dyella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Echinicola | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Ectothiorhodospira | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Edwardsiella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Eggerthella | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Ehrlichia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Eikenella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Elizabethkingia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Elusimicrobium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Endomicrobium | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Endozoicomonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Ensifer | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Enterobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Enterococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Entomoplasma | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Epibacterium | 0 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Ereboglobus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Erwinia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Erysipelothrix | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Erythrobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Escherichia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Ethanoligenens | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Eubacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Euzebya | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Exiguobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Ezakiella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Faecalibacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Faecalibaculum | 1 | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| Faecalitalea | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Fastidiosipila | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Fermentimonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Ferrimonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Ferriphaselus | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Ferroglobus | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Ferroplasma | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Fervidobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Fibrella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Fibrobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Fictibacillus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Filifactor | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Filimonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Fimbriimonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Finegoldia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Fischerella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Flagellimonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Flammeovirga | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Flavisolibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Flavivirga | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Flavobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Flavonifractor | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Flexistipes | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Fluviicola | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Formosa | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Francisella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Frankia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Frateuria | 1 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Friedmanniella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Frischella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Frondihabitans | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Fuerstia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| Fusobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Gallaecimonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Gallibacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Gallionella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Geitlerinema | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Gemella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Geminocystis | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Gemmatimonas | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Gemmatirosa | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Gemmobacter | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Geoalkalibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Geobacillus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Geobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Geodermatophilus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Geoglobus | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Geosporobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Gibbsiella | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Gilliamella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Gillisia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Gilvibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Glaciecola | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Glaesserella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Gloeobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Gloeocapsa | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Gloeomargarita | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 |
| Gluconacetobacter | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 0 |
| Gluconobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Glutamicibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Gordonia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Gordonibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Gottschalkia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Gramella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Granulibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Granulicella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Granulosicoccus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Grimontia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Gynuella | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Haematospirillum | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 |
| Haemophilus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Hafnia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Hahella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Halalkalicoccus | 0 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 |
| Halanaeroarchaeum | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 0 |
| Halanaerobium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Haliangium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Halioglobus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Haliscomenobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Haloarcula | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Halobacillus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Halobacteriovorax | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Halobacterium | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Halobacteroides | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Halobiforma | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Halocynthiibacter | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Halodesulfurarchaeum | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 |
| Haloferax | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Halogeometricum | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Halohasta | 0 | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 0 | 1 | 0 |
| Halomicrobium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 |
| Halomonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Halopenitus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Halopiger | 1 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| Haloplanus | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Haloquadratum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Halorhabdus | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Halorhodospira | 1 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Halorientalis | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Halorubrum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Halostagnicola | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 |
| Halotalea | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Haloterrigena | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Halothece | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Halothermothrix | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Halothiobacillus | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Hartmannibacter | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Helicobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Heliobacterium | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| Herbaspirillum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Herbinix | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Herminiimonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Hippea | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Hirschia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Histophilus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Hoeflea | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Hoyosella | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Humibacter | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| Hydrogenobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Hydrogenobaculum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Hydrogenophaga | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Hydrogenovibrio | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Hymenobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Hyperthermus | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 0 | 0 |
| Hyphomicrobium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Hyphomonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Ichthyobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Idiomarina | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Ignavibacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Ignicoccus | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 0 | 0 | 1 |
| Ilyobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Immundisolibacter | 1 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 0 |
| Intestinimonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Intrasporangium | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Isoptericola | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Isosphaera | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 |
| Janibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Jannaschia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Janthinobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Jatrophihabitans | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 |
| Jeongeupia | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 1 | 1 |
| Jeotgalibaca | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Jeotgalibacillus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Jeotgalicoccus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Jiangella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Jonesia | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 0 | 1 | 0 | 1 |
| Jonquetella | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 1 | 1 |
| Kangiella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Ketobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Ketogulonicigenium | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Kibdelosporangium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Kineococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Kingella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Kiritimatiella | 1 | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 |
| Kitasatospora | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Klebsiella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Kluyvera | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Kocuria | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Komagataeibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Kordia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Kosakonia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Kosmotoga | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Kozakia | 0 | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 1 |
| Kribbella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Kurthia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Kushneria | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Kutzneria | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Kyrpidia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Kytococcus | 1 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Labrenzia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Laceyella | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Lachnoanaerobaculum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Lachnoclostridium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Lacimicrobium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Lacinutrix | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Lactobacillus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Lactococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Lacunisphaera | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Laribacter | 0 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Lawsonella | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Lawsonia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Leadbetterella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Leclercia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Legionella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Leifsonia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Leisingera | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Lelliottia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Leminorella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Lentibacillus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Lentzea | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Leptolyngbya | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Leptospira | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Leptospirillum | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Leptothrix | 0 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 |
| Leptotrichia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Leuconostoc | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Libanicoccus | 0 | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Liberibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Limnobaculum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Limnochorda | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Limnohabitans | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Listeria | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Lonsdalea | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Luteibacter | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Luteimonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Luteipulveratus | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Luteitalea | 1 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Lutibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Lysinibacillus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Lysobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Macrococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Mageeibacillus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Magnetococcus | 0 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Magnetospira | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 |
| Magnetospirillum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Mahella | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Mannheimia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Maribacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Maricaulis | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Marichromatium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Marinilactibacillus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Marinithermus | 0 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Marinitoga | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Marinobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Marinobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Marinomonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Marinovum | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 |
| Mariprofundus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Maritalea | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 |
| Marivirga | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Marivivens | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Marmoricola | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Martelella | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Massilia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Megamonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Megasphaera | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Meiothermus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Melaminivora | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Melioribacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Melissococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Mesoplasma | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Mesorhizobium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Mesotoga | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Metallosphaera | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methanobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methanobrevibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methanocaldococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methanocella | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methanococcoides | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methanococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methanocorpusculum | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methanoculleus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methanofollis | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methanohalobium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methanohalophilus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methanolacinia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methanolinea | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| Methanolobus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methanomethylovorans | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methanoplanus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methanopyrus | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methanoregula | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methanosalsum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methanosarcina | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methanosphaera | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methanosphaerula | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| Methanospirillum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methanothermobacter | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methanothermococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methanothermus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methanothrix | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methanotorris | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methylacidiphilum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methylibium | 0 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methylobacillus | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methylobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methylocaldum | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methyloceanibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methylocella | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methylococcus | 0 | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| Methylocystis | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methylomicrobium | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methylomonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methylophaga | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methylophilus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methylosinus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methylotenera | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methyloversatilis | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Methylovorus | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| Methylovulum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Micavibrio | 0 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Microbacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Microbulbifer | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Microcella | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 0 | 1 | 1 |
| Micrococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Microcoleus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Microcystis | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Microlunatus | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Micromonospora | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Micropruina | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 0 | 1 | 0 | 1 |
| Microterricola | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Microvirga | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Microvirgula | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 1 | 1 |
| Mitsuaria | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Mobiluncus | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| Modestobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Mogibacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Monoglobus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Moorea | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Moorella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Moraxella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Morganella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Moritella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Mucilaginibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Mucinivorans | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Murdochiella | 0 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 |
| Muribaculum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Muricauda | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Mycetocola | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Mycobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Mycobacteroides | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Mycolicibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Mycolicibacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Mycoplasma | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Myroides | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Myxococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Nakamurella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Natranaerobius | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Natrialba | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Natrinema | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Natronobacterium | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 1 | 0 |
| Natronococcus | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Natronolimnobius | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 |
| Natronomonas | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Nautilia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Ndongobacter | 0 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Negativicoccus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Neisseria | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Neoasaia | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 1 | 1 |
| Neomicrococcus | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Neorhizobium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Neorickettsia | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Niabella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Niastella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Nitratifractor | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 |
| Nitratireductor | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Nitratiruptor | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Nitrobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Nitrosococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Nitrosomonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Nitrososphaera | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Nitrosospira | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Nitrospira | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Nitrospirillum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Niveispirillum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Nocardia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Nocardioides | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Nocardiopsis | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Nodularia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Nonlabens | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Nonomuraea | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Nostoc | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Novibacillus | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Novosphingobium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Oblitimonas | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Oceanicoccus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Oceanimonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Oceanisphaera | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Oceanithermus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Oceanobacillus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Ochrobactrum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Octadecabacter | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Odoribacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Oenococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Oleiphilus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Oleispira | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Oligella | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Olleya | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Olsenella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Opitutus | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Orientia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Ornithinimicrobium | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Ornithobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Orrella | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Oscillatoria | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Oscillibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Ottowia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Owenweeksia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Oxalobacter | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Paenalcaligenes | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Paenarthrobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Paenibacillus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Paeniclostridium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Paenisporosarcina | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pajaroellobacter | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| Palaeococcus | 1 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 1 |
| Paludibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Paludisphaera | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pandoraea | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pannonibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pantoea | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Parabacteroides | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Paraburkholderia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Parachlamydia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Paracoccus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Parageobacillus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Paraglaciecola | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Paraliobacillus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Paraoerskovia | 0 | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 0 | 1 | 1 |
| Paraphotobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pararhodospirillum | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 |
| Parascardovia | 1 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 |
| Parvibaculum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Parvimonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Parvularcula | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pasteurella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Paucibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pectobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pediococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pedobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pelagibacterium | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pelosinus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Peptoclostridium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Peptoniphilus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Persephonella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Petrimonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Petrotoga | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Phaeobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Phenylobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Phoenicibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Photobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Photorhabdus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Phreatobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Phycicoccus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Phycisphaera | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Phyllobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Picrophilus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pirellula | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Piscirickettsia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Planctopirus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Planktothrix | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Planococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Plantibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Plesiomonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pleurocapsa | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pluralibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Polaribacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Polaromonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Polymorphum | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Polynucleobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pontibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pontimonas | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| Porphyrobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Porphyromonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pragia | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Prauserella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Prevotella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Prochlorococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Propionibacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Propionimicrobium | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Prosthecochloris | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Proteiniphilum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Proteus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Providencia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pseudanabaena | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pseudarcicella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pseudarthrobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pseudoalteromonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pseudodesulfovibrio | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pseudogulbenkiania | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pseudohongiella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pseudolabrys | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pseudomonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pseudonocardia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pseudopedobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pseudopropionibacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pseudorhodoplanes | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pseudothermotoga | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| Pseudovibrio | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pseudoxanthomonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Psychrobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Psychroflexus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Psychromonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pusillimonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pyrobaculum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pyrococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pyrodictium | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Pyrolobus | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 0 | 0 | 1 | 1 |
| Rahnella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Ralstonia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Ramlibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Raoultella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Raphidiopsis | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Rathayibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Reinekea | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Renibacterium | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 |
| Rhizobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Rhizobium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Rhodanobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Rhodobaca | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Rhodobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Rhodococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Rhodoferax | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Rhodoluna | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 1 | 1 |
| Rhodomicrobium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Rhodopirellula | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Rhodoplanes | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Rhodopseudomonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Rhodospirillum | 0 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Rhodothermus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Rhodovulum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Rickettsia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Riemerella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Rivularia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Robiginitalea | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Roseateles | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Roseburia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Roseibacterium | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Roseiflexus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Roseobacter | 1 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Roseomonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Roseovarius | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Rothia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Rubinisphaera | 0 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 0 |
| Rubrivivax | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Rubrobacter | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 |
| Ruegeria | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Rufibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Ruminiclostridium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Ruminococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Rummeliibacillus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Runella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Saccharomonospora | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Saccharophagus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Saccharopolyspora | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Saccharospirillum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Saccharothrix | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Sagittula | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Salegentibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Salimicrobium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Salinarchaeum | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 0 | 0 |
| Salinibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Salinibacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Salinicoccus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Salinicola | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Salinigranum | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 |
| Salinimonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Salinisphaera | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Salinispira | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Salinivibrio | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Salinivirga | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Salipiger | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Salmonella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Sandaracinus | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Sanguibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Saprospira | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Scardovia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| Scytonema | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Sebaldella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Sedimenticola | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Sediminicola | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Sediminispirochaeta | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Segniliparus | 0 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| Selenomonas | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Seonamhaeicola | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Serinicoccus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Serratia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Shewanella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Shimwellia | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Shinella | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Siansivirga | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Sideroxydans | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 |
| Simiduia | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 |
| Simkania | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Simonsiella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Sinomonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Sinorhizobium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Slackia | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 0 |
| Sneathia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Snodgrassella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Sodalis | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Solibacillus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Solimonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Solitalea | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Sphaerobacter | 0 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 |
| Sphaerochaeta | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Sphaerospermopsis | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Sphingobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Sphingobium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Sphingomonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Sphingopyxis | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Sphingorhabdus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Spiribacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Spiroplasma | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Spirosoma | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Spongiibacter | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Sporosarcina | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Stackebrandtia | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Stanieria | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Staphylococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Staphylothermus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Stappia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Starkeya | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 |
| Stenotrophomonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Steroidobacter | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 1 |
| Stigmatella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Streptacidiphilus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Streptobacillus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Streptococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Streptomyces | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Streptosporangium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Sulfitobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Sulfolobus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Sulfuricaulis | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 |
| Sulfuricella | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Sulfuricurvum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Sulfuriferula | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 |
| Sulfurifustis | 0 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 1 | 1 |
| Sulfurihydrogenibium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Sulfurimonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Sulfurisphaera | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Sulfuritalea | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Sulfurospirillum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Sulfurovum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Sutterella | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Symbiobacterium | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Synechococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Synechocystis | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Syntrophobotulus | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Syntrophomonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Syntrophothermus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 |
| Syntrophus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Tannerella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Tateyamaria | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Tatlockia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Tatumella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Taylorella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Tenacibaculum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Tepidanaerobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Teredinibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Terribacillus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Terriglobus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Tessaracoccus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Tetragenococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thalassococcus | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thalassolituus | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thalassospira | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thalassotalea | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thauera | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thermacetogenium | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thermaerobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thermanaeromonas | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thermanaerovibrio | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thermincola | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thermoanaerobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thermoanaerobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thermobacillus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thermobaculum | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thermobifida | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thermobispora | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thermococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thermocrinis | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thermodesulfatator | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thermodesulfobacterium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thermodesulfobium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thermodesulfovibrio | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thermofilum | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thermogutta | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 0 |
| Thermomicrobium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thermomonospora | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thermoplasma | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thermoproteus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thermosediminibacter | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| Thermosipho | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thermosulfidibacter | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| Thermosynechococcus | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thermotoga | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thermovibrio | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 0 |
| Thermovirga | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thermus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thioalkalivibrio | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thiobacillus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thioclava | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thiocystis | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| Thioflavicoccus | 1 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 |
| Thiohalobacter | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 |
| Thiolapillus | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| Thiomicrospira | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thiomonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Thioploca | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Tistrella | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Tolumonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Treponema | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Trichodesmium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Trichormus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Tropheryma | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 0 |
| Truepera | 1 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 |
| Trueperella | 1 | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Tsukamurella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Turicibacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Turneriella | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| Ureaplasma | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Vagococcus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Variibacter | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 0 | 1 |
| Variovorax | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Veillonella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Verminephrobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Verrucomicrobium | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Vibrio | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Virgibacillus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Vitreoscilla | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Vogesella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Vulcanisaeta | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Vulgatibacter | 0 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| Waddlia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Weeksella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Weissella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Wenyingzhuangia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Wenzhouxiangella | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 0 | 0 | 1 | 0 |
| Wigglesworthia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Winogradskyella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Woeseia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Wolbachia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Wolinella | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 0 | 0 | 1 | 0 |
| Xanthobacter | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Xanthomonas | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Xenorhabdus | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Xylanimonas | 1 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Xylella | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Yersinia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Yoonia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Zhihengliuella | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 |
| Zhongshania | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Zobellella | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Zobellia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Zunongwangia | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Zymobacter | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 0 | 1 |
| Zymomonas | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
This table shows the number of differentially abundant taxa between body sites as determined by ANCOM tests with different significance thresholds.
py$ancom_df %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Comparison | Shared genera | ANCOM 0.6 | ANCOM 0.7 | ANCOM 0.8 | ANCOM 0.9 |
|---|---|---|---|---|---|
| Blood vs saliva | 1054 | 74 | 74 | 0 | 0 |
| Blood vs buccal | 1054 | 66 | 66 | 0 | 0 |
| Saliva vs buccal | 1054 | 0 | 0 | 0 | 0 |
| Saliva vs saliva_euk | 1054 | 0 | 0 | 0 | 0 |
| Buccal vs buccal_euk | 1054 | 6 | 6 | 6 | 0 |
| Blood vs saliva_euk | 1054 | 57 | 57 | 0 | 0 |
| Blood vs buccal_euk | 1054 | 28 | 28 | 0 | 0 |
| Saliva_euk vs buccal_euk | 1054 | 5 | 5 | 5 | 0 |
This is plotted using the ANCOM 0.7 results. I have removed all taxa that are present in all body sites. Yellow means present in all samples for that body site while purple means absent in all samples for that body site.
Note that the differences that we saw before with Klebsiella, Streptococcus and Prevotella are not present here as all of these are present in all samples.
new_new_db = new_db.rename(columns=site_dict)
keeping = []
taxa_new = list(new_new_db.index.values)
for tax in taxa_new:
this_row = new_new_db.loc[tax, :].values
if len(list(set(this_row))) == 1: keeping.append(False)
else: keeping.append(True)
new_new_db = new_new_db.loc[keeping, :]
plot_heatmap(new_new_db, ancom[2])
Function is profiled using:
1. HUMAnN2
- Uniref90 database
- Uniref50 database
2. HUMAnN3
- Uniref90 and Uniref50
We can see here that it’s a bit variable as to whether HUMAnN3 classifies more or less reads than either of the databases with HUMAnN2 (HUMAnN3 uses both Uniref50 and Uniref90). Also, with HUMAnN3, all of the samples with no taxonomic classifications with MetaPhlAn3 have 48-49% unaligned, which is much less than any of the other samples.
plt.figure(figsize=(8,6))
ax1 = plt.subplot(111)
x1 = [x for x in range(21)]
x2 = [x+0.3 for x in range(21)]
x3 = [x+0.6 for x in range(21)]
#x = [x+0.2 for x in range(21)]
ax1.bar(x1, reads.loc[:, 'unaligned_after_translation_uniref_90'].values, color=colors, edgecolor='k', width=0.3)
ax1.bar(x2, reads.loc[:, 'unaligned_after_translation_uniref_50'].values, color=colors, edgecolor='k', width=0.3, alpha=0.7)
ax1.bar(x3, reads.loc[:, 'unaligned_after_translation_humann3'].values, color=colors, edgecolor='k', width=0.3, alpha=0.4)
plt.xticks(x2, sample_names, rotation=90)
plt.ylabel('Unaligned reads after translation (%)')
plt.xlim([-0.5,21])
handles = [Patch(facecolor='k', edgecolor='k', label='HUMAnN2 Uniref90'), Patch(facecolor='k', edgecolor='k', alpha=0.7, label='HUMAnN2 Uniref50'), Patch(facecolor='k', edgecolor='k', alpha=0.4, label='HUMAnN3')]
ax1.legend(handles=handles, loc='upper left', bbox_to_anchor=(1,1.02))
plt.tight_layout()
plt.show()
After running HUMAnN2 and HUMAnN3, the pathabundance, pathcoverage and genefamilies tables are joined, and the pathabundance is renormalised (relative abundance calculated) and split to tables that are stratified by species or not. Here we use the unstratified pathabundance file (the MetaPhlAn2/MetaPhlAn3 species results don’t seem to have been particularly accurate). The genefamilies file is stratified by default so we take only the overall values for the gene family (removing all species results) and re-calculate these values as relative abundances.
pathways_90 = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann/processing/humann2_final_out_90/humann2_pathabundance_relab_unstratified.tsv', header=0, index_col=0, sep='\t')
pathways_50 = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann/processing/humann2_final_out_50/humann2_pathabundance_relab_unstratified.tsv', header=0, index_col=0, sep='\t')
pathways_h3 = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann3/humann3_final_out/humann3_pathabundance_relab_unstratified.tsv', header=0, index_col=0, sep='\t')
def get_richness(df):
snames = list(df.columns)
num_genes = []
for sn in snames:
one_sample = pd.DataFrame(df.loc[:, sn])
one_sample = one_sample[one_sample.max(axis=1) > 0]
num_genes.append(one_sample.shape[0])
return snames, num_genes
def genes_filter(genes):
genes.drop('UNMAPPED', axis=0, inplace=True)
gene_names = list(genes.index.values)
keeping = []
for gn in gene_names:
new_gn = gn.split('|')[0]
if gn == new_gn: keeping.append(True)
else: keeping.append(False)
genes_abs = genes.loc[keeping, :]
genes = genes_abs.divide(genes_abs.sum(axis=0)).multiply(100)
return genes_abs, genes
genes_90_not_cpm = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann/processing/humann2_final_out_90/humann2_genefamilies.tsv', header=0, index_col=0, sep='\t')
genes_90 = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann/processing/humann2_final_out_90/humann2_genefamilies_cpm.tsv', header=0, index_col=0, sep='\t')
genes_90_ko = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann/processing/humann2_final_out_90/humann2_genefamilies_cpm_ko_90.tsv', header=0, index_col=0, sep='\t')
genes_90_not_cpm = genes_filter(genes_90_not_cpm)[1]
genes_90 = genes_filter(genes_90)[1]
genes_90_ko = genes_filter(genes_90_ko)[1]
snames_genes_90, genes_90_richness = get_richness(genes_90)
snames_genes_90_ko, genes_90_richness_ko = get_richness(genes_90_ko)
snames_pways_90, pways_90_richness = get_richness(pathways_90)
genes_50_not_cpm = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann/processing/humann2_final_out_50/humann2_genefamilies.tsv', header=0, index_col=0, sep='\t')
genes_50 = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann/processing/humann2_final_out_50/humann2_genefamilies_cpm.tsv', header=0, index_col=0, sep='\t')
genes_50_ko = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann/processing/humann2_final_out_50/humann2_genefamilies_cpm_ko_50.tsv', header=0, index_col=0, sep='\t')
genes_50_not_cpm = genes_filter(genes_50_not_cpm)[1]
genes_50 = genes_filter(genes_50)[1]
genes_50_ko = genes_filter(genes_50_ko)[1]
snames_genes_50, genes_50_richness = get_richness(genes_50)
snames_genes_50_ko, genes_50_richness_ko = get_richness(genes_50_ko)
snames_pways_50, pways_50_richness = get_richness(pathways_50)
genes_h3_not_cpm = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann3/humann3_final_out/humann3_genefamilies.tsv', header=0, index_col=0, sep='\t')
genes_h3 = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann3/humann3_final_out/humann3_genefamilies_cpm.tsv', header=0, index_col=0, sep='\t')
genes_h3_ko_50 = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann3/humann3_final_out/humann3_genefamilies_cpm_ko_50.tsv', header=0, index_col=0, sep='\t')
genes_h3_ko_90 = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann3/humann3_final_out/humann3_genefamilies_cpm_ko_90.tsv', header=0, index_col=0, sep='\t')
genes_h3_not_cpm = genes_filter(genes_h3_not_cpm)[1]
genes_h3 = genes_filter(genes_h3)[1]
genes_h3_ko_50 = genes_filter(genes_h3_ko_50)[1]
genes_h3_ko_90 = genes_filter(genes_h3_ko_90)[1]
snames_genes_h3, genes_h3_richness = get_richness(genes_h3)
snames_genes_h3_ko_50, genes_h3_richness_ko_50 = get_richness(genes_h3_ko_50)
snames_genes_h3_ko_90, genes_h3_richness_ko_90 = get_richness(genes_h3_ko_90)
snames_pways_h3, pways_h3_richness = get_richness(pathways_h3)
Here I’ve used the built-in HUMAnN databases to map the Uniref gene families to KEGG orthologs, and this is a comparison of the proportion of reads that are ungrouped by using the mapping tables on the HUMAnN2 Uniref90 and Uniref50 databases and then the HUMAnN2 mapping tables separately on the same output for the Uniref90 and UNiref50 (even though these are combined in the HUMAnN2 output).
plt.figure(figsize=(8,6))
ax1 = plt.subplot(211)
ax2 = plt.subplot(212)
x = [a for a in range(len(genes_50_ko.columns))]
x1 = [a+0.4 for a in x]
ax1.bar(x, genes_90_ko.loc['UNGROUPED', :].values, width=0.4, color=colors, edgecolor='k')
ax1.bar(x1, genes_50_ko.loc['UNGROUPED', :].values, width=0.4, color=colors, edgecolor='k', alpha=0.6)
ax2.bar(x, genes_h3_ko_90.loc['UNGROUPED', :].values, width=0.4, color=colors, edgecolor='k')
ax2.bar(x1, genes_h3_ko_50.loc['UNGROUPED', :].values, width=0.4, color=colors, edgecolor='k', alpha=0.6)
plt.sca(ax1)
plt.xlim([-0.5,21]), plt.xticks([])
handles = [Patch(facecolor='k', edgecolor='k', label='Uniref90'), Patch(facecolor='k', edgecolor='k', alpha=0.6, label='Uniref50')]
ax1.legend(handles=handles, loc='upper left', bbox_to_anchor=(1,1.02))
plt.ylabel('Ungrouped\nKEGG orthologs (%)')
plt.ylim([0,100])
plt.sca(ax2)
plt.xlim([-0.5,21])
plt.ylim([0,100])
plt.ylabel('Ungrouped\nKEGG orthologs (%)')
plt.xticks(x1, sample_names, rotation=90)
plt.tight_layout()
plt.show()
Given how small the proportion of the Uniref gene families that have mapped to KEGG orthologs is, we will continue with the Uniref gene families, and we use the tables that have been normalised within HUMAnN to CPM.
# genes_50 = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann/processing/humann2_final_out_50/humann2_genefamilies.tsv', header=0, index_col=0, sep='\t')
# genes_50.drop('UNMAPPED', axis=0, inplace=True)
# gene_names = list(genes_50.index.values)
# keeping = []
# for gn in gene_names:
# new_gn = gn.split('|')[0]
# if gn == new_gn: keeping.append(True)
# else: keeping.append(False)
# genes_50_abs = genes_50.loc[keeping, :]
# genes_50 = genes_50_abs.divide(genes_50_abs.sum(axis=0)).multiply(100)
#
# snames_genes_50, genes_50_richness = get_richness(genes_50)
# snames_pways_50, pways_50_richness = get_richness(pathways_50)
#
# genes_h3 = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann3/humann3_final_out/humann3_genefamilies_cpm.tsv', header=0, index_col=0, sep='\t')
# genes_h3.drop('UNMAPPED', axis=0, inplace=True)
# gene_names = list(genes_h3.index.values)
# keeping = []
# for gn in gene_names:
# new_gn = gn.split('|')[0]
# if gn == new_gn: keeping.append(True)
# else: keeping.append(False)
# genes_abs = genes_h3.loc[keeping, :]
# genes_h3 = genes_abs.divide(genes_abs.sum(axis=0)).multiply(100)
#
# snames_genes, genes_richness = get_richness(genes_h3)
# snames_pways, pways_richness = get_richness(pathways)
# genes_90 = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann/processing/humann2_final_out_90/humann2_genefamilies.tsv', header=0, index_col=0, sep='\t')
# genes_90.drop('UNMAPPED', axis=0, inplace=True)
# gene_names = list(genes_90.index.values)
# keeping = []
# for gn in gene_names:
# new_gn = gn.split('|')[0]
# if gn == new_gn: keeping.append(True)
# else: keeping.append(False)
# genes_90_abs = genes_90.loc[keeping, :]
# genes_90 = genes_90_abs.divide(genes_90_abs.sum(axis=0)).multiply(100)
plt.figure(figsize=(8,6))
ax1 = plt.subplot(211)
ax2 = plt.subplot(212)
x = [a for a in range(len(genes_90.columns))]
x1 = [a+0.3 for a in x]
x2 = [a+0.6 for a in x]
ax1.bar(x, genes_90_richness, width=0.3, color=colors, edgecolor='k')
ax2.bar(x, genes_90_richness, width=0.3, color=colors, edgecolor='k')
ax1.bar(x1[:len(genes_50_richness)], genes_50_richness, width=0.3, color=colors, edgecolor='k', alpha=0.7)
ax2.bar(x1[:len(genes_50_richness)], genes_50_richness, width=0.3, color=colors, edgecolor='k', alpha=0.7)
ax1.bar(x2, genes_h3_richness, width=0.3, color=colors, alpha=0.4, edgecolor='k')
ax2.bar(x2, genes_h3_richness, width=0.3, color=colors, alpha=0.4, edgecolor='k')
plt.sca(ax1)
plt.xlim([-0.5,21]), plt.xticks([])
handles = [Patch(facecolor='k', edgecolor='k', label='HUMAnN2 Uniref90'), Patch(facecolor='k', edgecolor='k', alpha=0.7, label='HUMAnN2 Uniref50'), Patch(facecolor='k', edgecolor='k', alpha=0.4, label='HUMAnN3')]
ax1.legend(handles=handles, loc='upper left', bbox_to_anchor=(1,1.02))
plt.ylabel('Gene family\nrichness')
plt.sca(ax2)
plt.semilogy()
plt.xlim([-0.5,21])
plt.ylabel('Gene family\nrichness (log scale)')
plt.xticks(x1, sample_names, rotation=90)
plt.tight_layout()
plt.show()
We can see here that the number of gene families is lower for HUMAnN3 than for either of the HUMAnN2 runs, aside from for the blood samples where we have higher richness for HUMAnN3 than Uniref90, but lower than Uniref50.
plt.figure(figsize=(8,6))
ax1 = plt.subplot(211)
ax2 = plt.subplot(212)
x = [a for a in range(len(genes_90.columns))]
x1 = [a+0.3 for a in x]
x2 = [a+0.6 for a in x]
ax1.bar(x, pways_90_richness, width=0.3, color=colors, edgecolor='k')
ax1.bar(x1, pways_50_richness, width=0.3, color=colors, edgecolor='k', alpha=0.7)
ax2.bar(x, pways_90_richness, width=0.3, color=colors, edgecolor='k')
ax2.bar(x1, pways_50_richness, width=0.3, color=colors, edgecolor='k', alpha=0.7)
ax1.bar(x2, pways_h3_richness, width=0.3, color=colors, alpha=0.4, edgecolor='k')
ax2.bar(x2, pways_h3_richness, width=0.3, color=colors, alpha=0.4, edgecolor='k')
plt.sca(ax1)
plt.xticks([])
plt.ylabel('Pathway richness')
handles = [Patch(facecolor='k', edgecolor='k', label='HUMAnN2 Uniref90'), Patch(facecolor='k', edgecolor='k', alpha=0.7, label='HUMAnN2 Uniref50'), Patch(facecolor='k', edgecolor='k', alpha=0.4, label='HUMAnN3')]
ax1.legend(handles=handles, bbox_to_anchor=(1,1))
plt.xlim([-0.5,21])
plt.sca(ax2)
plt.semilogy()
plt.xlim([-0.5,21]), plt.xticks(x1, sample_names, rotation=90)
plt.ylabel('Pathway richness (log scale)')
plt.tight_layout()
plt.show()
Here we can see that HUMAnN3 gives us no pathways for the blood samples, and - as with the gene families - HUMAnN3 has lower numbers of pathways than either of the HUMAnN2 runs for all other samples.
plt.figure(figsize=(8,6))
ax1, ax2, ax3 = plt.subplot(221), plt.subplot(222), plt.subplot(223)
genes_50_t = genes_50.transpose()
pos_50, npos_50, nmds_stress_50 = transform_for_NMDS(genes_50_t)
genes_90_t = genes_90.transpose()
pos_90, npos_90, nmds_stress_90 = transform_for_NMDS(genes_90_t)
genes_h3_t = genes_h3.transpose()
pos_h3, npos_h3, nmds_stress_h3 = transform_for_NMDS(genes_h3_t)
for a in range(len(npos_50)):
ax1.scatter(npos_50[a,0], npos_50[a,1], marker=shapes[a], color=colors[a], s=100)
ax2.scatter(npos_90[a,0], npos_90[a,1], marker=shapes[a], color=colors[a], s=100)
ax3.scatter(npos_h3[a,0], npos_h3[a,1], marker=shapes[a], color=colors[a], s=100)
ax1.set_xlabel('nMDS1'), ax2.set_xlabel('nMDS1'), ax3.set_xlabel('nMDS1')
ax1.set_ylabel('nMDS2'), ax2.set_ylabel('nMDS2'), ax3.set_ylabel('nMDS2')
ax1.set_title('HUMAnN2 Uniref50'), ax2.set_title('HUMAnN2 Uniref90'), ax3.set_title('HUMAnN3')
handles1 = [Patch(facecolor=colors_dict[color], edgecolor='k', label=color) for color in colors_dict]
handles2 = [Line2D([0], [0], marker=shapes_dict[shape], color='w', label=shape, markerfacecolor='k', markersize=15) for shape in shapes_dict]
plt.tight_layout()
ax3.legend(handles=handles1+handles2, loc='upper left', bbox_to_anchor=(1,1.02))
plt.show()
plt.figure(figsize=(8,6))
ax1, ax2, ax3, ax4 = plt.subplot(221), plt.subplot(222), plt.subplot(223), plt.subplot(224)
genes_50_ko_t = genes_50_ko.transpose()
pos_50_ko, npos_50_ko, nmds_stress_50_ko = transform_for_NMDS(genes_50_ko_t)
genes_90_ko_t = genes_90_ko.transpose()
pos_90_ko, npos_90_ko, nmds_stress_90_ko = transform_for_NMDS(genes_90_ko_t)
genes_h3_ko_50_t = genes_h3_ko_50.transpose()
pos_h3_ko_50, npos_h3_ko_50, nmds_stress_h3_ko_50 = transform_for_NMDS(genes_h3_ko_50_t)
genes_h3_ko_90_t = genes_h3_ko_90.transpose()
pos_h3_ko_90, npos_h3_ko_90, nmds_stress_h3_ko_90 = transform_for_NMDS(genes_h3_ko_90_t)
for a in range(len(npos_50)):
ax1.scatter(npos_50_ko[a,0], npos_50_ko[a,1], marker=shapes[a], color=colors[a], s=100)
ax2.scatter(npos_90_ko[a,0], npos_90_ko[a,1], marker=shapes[a], color=colors[a], s=100)
ax3.scatter(npos_h3_ko_50[a,0], npos_h3_ko_50[a,1], marker=shapes[a], color=colors[a], s=100)
ax4.scatter(npos_h3_ko_90[a,0], npos_h3_ko_90[a,1], marker=shapes[a], color=colors[a], s=100)
ax1.set_xlabel('nMDS1'), ax2.set_xlabel('nMDS1'), ax3.set_xlabel('nMDS1'), ax4.set_xlabel('nMDS1')
ax1.set_ylabel('nMDS2'), ax2.set_ylabel('nMDS2'), ax3.set_ylabel('nMDS2'), ax4.set_ylabel('nMDS2')
ax1.set_title('HUMAnN2 Uniref50'), ax2.set_title('HUMAnN2 Uniref90'), ax3.set_title('HUMAnN3 Uniref50'), ax4.set_title('HUMAnN3 Uniref90'),
handles1 = [Patch(facecolor=colors_dict[color], edgecolor='k', label=color) for color in colors_dict]
handles2 = [Line2D([0], [0], marker=shapes_dict[shape], color='w', label=shape, markerfacecolor='k', markersize=15) for shape in shapes_dict]
ax4.legend(handles=handles1+handles2, loc='upper left', bbox_to_anchor=(1,1.02))
plt.tight_layout()
plt.show()
Now we are looking at the gene families, pathways and KEGG orthologs that are differentially abundant between body sites, using the lists that are output by HUMAnN2.
def get_diff_abundant(genes, name_csv, fp= '/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann/processing/'):
col_names = list(genes.columns)
col_names_dict = {}
for cn in col_names:
if 'SRR8595488' in cn:
genes.drop([cn], axis=1, inplace=True)
cn_new = cn.split('_')[0]
col_names_dict[cn] = site_dict[cn_new]
new_genes = genes.rename(columns=col_names_dict)
comparisons = [['Saliva', 'Blood'], ['Buccal', 'Blood'], ['Saliva', 'Buccal'], ['Saliva', 'Saliva_euk'], ['Buccal', 'Buccal_euk'], ['Blood', 'Saliva_euk'], ['Blood', 'Buccal_euk']]
fig = plt.figure(figsize=(15,8))
ax = [plt.subplot2grid((2,8), (0,1), colspan=2), plt.subplot2grid((2,8), (0,3), colspan=2), plt.subplot2grid((2,8), (0,5), colspan=2), plt.subplot2grid((2,8), (1,0), colspan=2), plt.subplot2grid((2,8), (1,2), colspan=2), plt.subplot2grid((2,8), (1,4), colspan=2), plt.subplot2grid((2,8), (1,6), colspan=2)]
for a in range(len(comparisons)):
ax[a].set_title(comparisons[a][0]+r' $vs$ '+comparisons[a][1])
this_comparison = new_genes.loc[:, comparisons[a]]
this_comparison = this_comparison[this_comparison.max(axis=1) > 0]
genes_comp = list(this_comparison.index.values)
fc, pval, colors_p, this_fc = [], [], [], []
for b in range(len(genes_comp)):
c1, c2 = list(this_comparison.loc[genes_comp[b], comparisons[a][0]]), list(this_comparison.loc[genes_comp[b], comparisons[a][1]])
c1, c2 = [0.000001 if x == 0 else x for x in c1], [0.000001 if x == 0 else x for x in c2]
c1, c2 = [math.log2(c1[c]) for c in range(len(c1))], [math.log2(c2[c]) for c in range(len(c2))]
t, p = stats.ttest_ind(c1, c2)
c1, c2 = np.median(c1), np.median(c2)
diff = c2-c1
if diff < 1 and diff > 0: diff = -(1/diff)
fc.append(diff), pval.append(p)
this_fc.append([genes_comp[b], diff, p])
colors_p_all = [colors_dict[comparisons[a][0]], colors_dict[comparisons[a][1]]]
for c in range(len(fc)):
if fc[c] >= 2 and pval[c] <= 0.01: colors_p.append(colors_p_all[1])
elif fc[c] <= -2 and pval[c] <= 0.01: colors_p.append(colors_p_all[0])
else: colors_p.append('#B1B1B1')
pval[c] = -math.log10(pval[c])
com = comparisons[a][0]+'_'+comparisons[a][1]
if a == 0:
df_pval = pd.DataFrame(this_fc, columns=['Genes/pathways', com+'_FC', com+'_p'])
df_pval.set_index('Genes/pathways', inplace=True)
else:
new_df = pd.DataFrame(this_fc, columns=['Genes/pathways', com+'_FC', com+'_p'])
new_df.set_index('Genes/pathways', inplace=True)
df_pval = pd.concat([df_pval, new_df], axis=1, join='outer')
ma = max([max(fc), abs(min(fc))])+0.5
if ma > 10: ma = 10.5
ax[a].set_xlim([-ma, ma])
ax[a].scatter(fc, pval, marker='o', c=colors_p, s=10)
ax[a].set_xlabel(r'Log$_2$ fold change')
if a == 0 or a == 3:
ax[a].set_ylabel('-log(p value)')
plt.sca(ax[a])
df_pval.to_csv(fp+name_csv+'.csv')
handles = [Patch(facecolor=colors_dict[color], edgecolor='k', label=color) for color in colors_dict]
ax[2].legend(handles=handles, bbox_to_anchor=(1.04,1), loc='upper left')
return
Volcano plot showing the number of differentially abundant pathways between body sites. Colored points denote genes that are significantly differentially abundant (T-test p<0.01 and log2 (median) FC > 2 or < -2) and they are colored by which body site they are highest in abundance in. Note that very large fold changes are not shown. Initial results showed that there were very few genes with > 10 or < -10, so the maximum plotted is this.
pways = pd.DataFrame(pathways_90)
get_diff_abundant(pways, 'pathways_90')
plt.subplots_adjust(hspace=0.5, wspace=0.5)
plt.show()
Volcano plot showing the number of differentially abundant pathways between body sites. Colored points denote genes that are significantly differentially abundant (T-test p<0.01 and log2 (median) FC > 2 or < -2) and they are colored by which body site they are highest in abundance in. Note that very large fold changes are not shown. Initial results showed that there were very few genes with > 10 or < -10, so the maximum plotted is this.
pways = pd.DataFrame(pathways_50)
get_diff_abundant(pways, 'pathways_50')
plt.subplots_adjust(hspace=0.5, wspace=0.5)
plt.show()
Volcano plot showing the number of differentially abundant gene families between body sites. Colored points denote genes that are significantly differentially abundant (T-test p<0.01 and log2 (median) FC > 2 or < -2) and they are colored by which body site they are highest in abundance in. Note that very large fold changes are not shown. Initial results showed that there were very few genes with > 10 or < -10, so the maximum plotted is this.
pways = pd.DataFrame(genes_90)
get_diff_abundant(pways, 'genes_90')
plt.subplots_adjust(hspace=0.5, wspace=0.5)
plt.show()
Volcano plot showing the number of differentially abundant gene families between body sites. Colored points denote genes that are significantly differentially abundant (T-test p<0.01 and log2 (median) FC > 2 or < -2) and they are colored by which body site they are highest in abundance in. Note that very large fold changes are not shown. Initial results showed that there were very few genes with > 10 or < -10, so the maximum plotted is this.
pways = pd.DataFrame(genes_50)
get_diff_abundant(pways, 'genes_50')
plt.subplots_adjust(hspace=0.5, wspace=0.5)
plt.show()
Volcano plot showing the number of differentially abundant gene families between body sites. Colored points denote genes that are significantly differentially abundant (T-test p<0.01 and log2 (median) FC > 2 or < -2) and they are colored by which body site they are highest in abundance in. Note that very large fold changes are not shown. Initial results showed that there were very few genes with > 10 or < -10, so the maximum plotted is this.
pways = pd.DataFrame(genes_90_ko)
get_diff_abundant(pways, 'genes_90_ko')
plt.subplots_adjust(hspace=0.5, wspace=0.5)
plt.show()
Volcano plot showing the number of differentially abundant KEGG orthologs between body sites. Colored points denote genes that are significantly differentially abundant (T-test p<0.01 and log2 (median) FC > 2 or < -2) and they are colored by which body site they are highest in abundance in. Note that very large fold changes are not shown. Initial results showed that there were very few genes with > 10 or < -10, so the maximum plotted is this.
pways = pd.DataFrame(genes_50_ko)
get_diff_abundant(pways, 'genes_50_ko')
plt.subplots_adjust(hspace=0.5, wspace=0.5)
plt.show()
Now we are looking at the gene families and pathways that are differentially abundant between body sites, using the lists that are output by HUMAnN3.
Volcano plot showing the number of differentially abundant pathways between body sites. Colored points denote genes that are significantly differentially abundant (T-test p<0.01 and log2 (median) FC > 2 or < -2) and they are colored by which body site they are highest in abundance in. Note that very large fold changes are not shown. Initial results showed that there were very few genes with > 10 or < -10, so the maximum plotted is this.
pways = pd.DataFrame(pathways_h3)
get_diff_abundant(pways, 'pathways', fp = '/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann3/')
plt.subplots_adjust(hspace=0.5, wspace=0.5)
plt.show()
Volcano plot showing the number of differentially abundant pathways between body sites. Colored points denote genes that are significantly differentially abundant (T-test p<0.01 and log2 (median) FC > 2 or < -2) and they are colored by which body site they are highest in abundance in. Note that very large fold changes are not shown. Initial results showed that there were very few genes with > 10 or < -10, so the maximum plotted is this.
pways = pd.DataFrame(genes_h3)
get_diff_abundant(pways, 'genes', fp = '/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann3/')
plt.subplots_adjust(hspace=0.5, wspace=0.5)
plt.show()
Volcano plot showing the number of differentially abundant pathways between body sites. Colored points denote genes that are significantly differentially abundant (T-test p<0.01 and log2 (median) FC > 2 or < -2) and they are colored by which body site they are highest in abundance in. Note that very large fold changes are not shown. Initial results showed that there were very few genes with > 10 or < -10, so the maximum plotted is this.
Note that now we have a separation of Uniref90 and Uniref50 because this is what the mapping files do. I’m not sure yet if it’s appropriate to just combine these two tables or now.
pways = pd.DataFrame(genes_h3_ko_90)
get_diff_abundant(pways, 'genes_ko_90', fp = '/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann3/')
plt.subplots_adjust(hspace=0.5, wspace=0.5)
plt.show()
Volcano plot showing the number of differentially abundant pathways between body sites. Colored points denote genes that are significantly differentially abundant (T-test p<0.01 and log2 (median) FC > 2 or < -2) and they are colored by which body site they are highest in abundance in. Note that very large fold changes are not shown. Initial results showed that there were very few genes with > 10 or < -10, so the maximum plotted is this.
Note that now we have a separation of Uniref90 and Uniref50 because this is what the mapping files do. I’m not sure yet if it’s appropriate to just combine these two tables or now.
pways = pd.DataFrame(genes_h3_ko_50)
get_diff_abundant(pways, 'genes_ko_50', fp = '/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann3/')
plt.subplots_adjust(hspace=0.5, wspace=0.5)
plt.show()
For each of these, I am initially filtering out to only P values that are below 0.01, and then am taking only the most up- or down-regulated pathways (50 of each) and printing them to a table.
For HUMAnN2 I am now looking at the pathways assigned to the Uniref50 database.
pathways = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann/processing/pathways_50.csv', header=0, index_col=0)
Negative fold changes are higher in saliva samples while positive are higher in blood samples.
bs = pathways.loc[:, ['Saliva_Blood_FC', 'Saliva_Blood_p']]
bs = bs[bs['Saliva_Blood_p'] < 0.01]
bs = bs.sort_values(by=['Saliva_Blood_FC'], ascending=False).rename(columns={'Saliva_Blood_FC':'Log2 fold change', 'Saliva_Blood_p':'p'})
bs = pd.concat([bs.head(n=50), bs.tail(n=50)])
bs.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bs %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| PWY-6748: nitrate reduction VII (denitrification) | 3.353935 | 0.0050250 |
| PWY66-388: fatty acid α-oxidation III | 2.359026 | 0.0008840 |
| PWY-5304: superpathway of sulfur oxidation (Acidianus ambivalens) | 2.298469 | 0.0007984 |
| PWY-7090: UDP-2,3-diacetamido-2,3-dideoxy-α-D-mannuronate biosynthesis | 1.992666 | 0.0000075 |
| LIPASYN-PWY: phospholipases | 1.121379 | 0.0027953 |
| PWY-6309: L-tryptophan degradation XI (mammalian, via kynurenine) | 1.064014 | 0.0000213 |
| PWY-7433: mucin core 1 and core 2 O-glycosylation | -1.003870 | 0.0030829 |
| URSIN-PWY: ureide biosynthesis | -1.125074 | 0.0005477 |
| PWY-5067: glycogen biosynthesis II (from UDP-D-Glucose) | -1.177346 | 0.0000897 |
| THREOCAT-PWY: superpathway of L-threonine metabolism | -1.222813 | 0.0000033 |
| RHAMCAT-PWY: L-rhamnose degradation I | -1.225334 | 0.0065949 |
| FUC-RHAMCAT-PWY: superpathway of fucose and rhamnose degradation | -1.462906 | 0.0028178 |
| GALACTUROCAT-PWY: D-galacturonate degradation I | -1.599911 | 0.0093617 |
| PWY-6344: L-ornithine degradation II (Stickland reaction) | -1.650884 | 0.0001819 |
| PWY-7039: phosphatidate metabolism, as a signaling molecule | -1.752074 | 0.0017477 |
| ARGININE-SYN4-PWY: L-ornithine de novo biosynthesis | -1.803634 | 0.0004227 |
| PWY-622: starch biosynthesis | -1.830364 | 0.0060239 |
| PWY-5392: reductive TCA cycle II | -1.871932 | 0.0011918 |
| PWY-2201: folate transformations I | -1.900549 | 0.0000170 |
| P105-PWY: TCA cycle IV (2-oxoglutarate decarboxylase) | -1.921687 | 0.0073514 |
| FUCCAT-PWY: fucose degradation | -1.968721 | 0.0046833 |
| PWY-6630: superpathway of L-tyrosine biosynthesis | -2.047251 | 0.0001833 |
| PWY-6590: superpathway of Clostridium acetobutylicum acidogenic fermentation | -2.052614 | 0.0068845 |
| PWY0-42: 2-methylcitrate cycle I | -2.058113 | 0.0037055 |
| PWY-6823: molybdenum cofactor biosynthesis | -2.141908 | 0.0003348 |
| PWY-5747: 2-methylcitrate cycle II | -2.206359 | 0.0027051 |
| TCA-GLYOX-BYPASS: superpathway of glyoxylate bypass and TCA | -2.265081 | 0.0006474 |
| PWY-5177: glutaryl-CoA degradation | -2.330080 | 0.0006336 |
| PWY-6708: ubiquinol-8 biosynthesis (prokaryotic) | -2.343357 | 0.0090424 |
| PWY-5857: ubiquinol-10 biosynthesis (prokaryotic) | -2.343357 | 0.0090424 |
| PWY-5856: ubiquinol-9 biosynthesis (prokaryotic) | -2.343357 | 0.0090424 |
| PWY-5855: ubiquinol-7 biosynthesis (prokaryotic) | -2.343357 | 0.0090424 |
| DENITRIFICATION-PWY: nitrate reduction I (denitrification) | -2.395529 | 0.0000040 |
| PWY-6749: CMP-legionaminate biosynthesis I | -2.448271 | 0.0020433 |
| P162-PWY: L-glutamate degradation V (via hydroxyglutarate) | -2.467260 | 0.0044743 |
| CITRULBIO-PWY: L-citrulline biosynthesis | -2.523788 | 0.0034968 |
| PWY-821: superpathway of sulfur amino acid biosynthesis (Saccharomyces cerevisiae) | -2.714446 | 0.0013771 |
| COBALSYN-PWY: adenosylcobalamin salvage from cobinamide I | -2.748948 | 0.0011485 |
| PWY-6891: thiazole biosynthesis II (Bacillus) | -2.765335 | 0.0006256 |
| PWY-7003: glycerol degradation to butanol | -2.827942 | 0.0099335 |
| PWY-7371: 1,4-dihydroxy-6-naphthoate biosynthesis II | -2.884515 | 0.0032543 |
| PWY-5104: L-isoleucine biosynthesis IV | -2.983969 | 0.0048673 |
| PWY-5384: sucrose degradation IV (sucrose phosphorylase) | -2.985100 | 0.0001422 |
| PWY-5088: L-glutamate degradation VIII (to propanoate) | -3.049735 | 0.0034401 |
| PWY-6892: thiazole biosynthesis I (E. coli) | -3.057563 | 0.0003621 |
| PWY-6263: superpathway of menaquinol-8 biosynthesis II | -3.136512 | 0.0009763 |
| PWY-7198: pyrimidine deoxyribonucleotides de novo biosynthesis IV | -3.140996 | 0.0069799 |
| P164-PWY: purine nucleobases degradation I (anaerobic) | -3.185865 | 0.0007588 |
| GLYCOLYSIS-TCA-GLYOX-BYPASS: superpathway of glycolysis, pyruvate dehydrogenase, TCA, and glyoxylate bypass | -3.194745 | 0.0000566 |
| PWY-6549: L-glutamine biosynthesis III | -3.214460 | 0.0000285 |
| ARO-PWY: chorismate biosynthesis I | -6.932112 | 0.0000010 |
| ANAEROFRUCAT-PWY: homolactic fermentation | -6.944961 | 0.0000022 |
| PWY-5103: L-isoleucine biosynthesis III | -6.947366 | 0.0000008 |
| COMPLETE-ARO-PWY: superpathway of aromatic amino acid biosynthesis | -6.951019 | 0.0000009 |
| BRANCHED-CHAIN-AA-SYN-PWY: superpathway of branched amino acid biosynthesis | -6.974922 | 0.0000009 |
| CALVIN-PWY: Calvin-Benson-Bassham cycle | -6.977160 | 0.0000012 |
| PWY-7220: adenosine deoxyribonucleotides de novo biosynthesis II | -7.010723 | 0.0000002 |
| PWY-7222: guanosine deoxyribonucleotides de novo biosynthesis II | -7.010723 | 0.0000002 |
| PWY-5097: L-lysine biosynthesis VI | -7.026101 | 0.0000009 |
| PWY-7539: 6-hydroxymethyl-dihydropterin diphosphate biosynthesis III (Chlamydia) | -7.030599 | 0.0000066 |
| DTDPRHAMSYN-PWY: dTDP-L-rhamnose biosynthesis I | -7.038235 | 0.0000013 |
| PWY-6163: chorismate biosynthesis from 3-dehydroquinate | -7.045429 | 0.0000015 |
| PWY-7208: superpathway of pyrimidine nucleobases salvage | -7.046760 | 0.0000005 |
| DENOVOPURINE2-PWY: superpathway of purine nucleotides de novo biosynthesis II | -7.048013 | 0.0000007 |
| VALSYN-PWY: L-valine biosynthesis | -7.080156 | 0.0000007 |
| ILEUSYN-PWY: L-isoleucine biosynthesis I (from threonine) | -7.080156 | 0.0000007 |
| PWY-7111: pyruvate fermentation to isobutanol (engineered) | -7.085083 | 0.0000005 |
| PWY-2942: L-lysine biosynthesis III | -7.099831 | 0.0000010 |
| PWY-5484: glycolysis II (from fructose 6-phosphate) | -7.100485 | 0.0000017 |
| PWY-5100: pyruvate fermentation to acetate and lactate II | -7.112480 | 0.0000013 |
| TRNA-CHARGING-PWY: tRNA charging | -7.114433 | 0.0000019 |
| PWY-6125: superpathway of guanosine nucleotides de novo biosynthesis II | -7.115824 | 0.0000005 |
| NONMEVIPP-PWY: methylerythritol phosphate pathway I | -7.145606 | 0.0000027 |
| PWY-7184: pyrimidine deoxyribonucleotides de novo biosynthesis I | -7.148419 | 0.0000004 |
| PWY-841: superpathway of purine nucleotides de novo biosynthesis I | -7.160267 | 0.0000007 |
| PWY-6121: 5-aminoimidazole ribonucleotide biosynthesis I | -7.177065 | 0.0000006 |
| PWY-3841: folate transformations II | -7.183704 | 0.0000020 |
| PWY-6147: 6-hydroxymethyl-dihydropterin diphosphate biosynthesis I | -7.195526 | 0.0000039 |
| PWY-6277: superpathway of 5-aminoimidazole ribonucleotide biosynthesis | -7.203456 | 0.0000003 |
| PWY-6122: 5-aminoimidazole ribonucleotide biosynthesis II | -7.203456 | 0.0000003 |
| PWY-7228: superpathway of guanosine nucleotides de novo biosynthesis I | -7.209498 | 0.0000005 |
| PWY0-166: superpathway of pyrimidine deoxyribonucleotides de novo biosynthesis (E. coli) | -7.212900 | 0.0000006 |
| PWY-6385: peptidoglycan biosynthesis III (mycobacteria) | -7.219420 | 0.0000023 |
| COA-PWY-1: coenzyme A biosynthesis II (mammalian) | -7.223528 | 0.0000009 |
| 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis | -7.229362 | 0.0000017 |
| PWY-5686: UMP biosynthesis | -7.260887 | 0.0000012 |
| GLYCOLYSIS: glycolysis I (from glucose 6-phosphate) | -7.272773 | 0.0000014 |
| PWY-5695: urate biosynthesis/inosine 5’-phosphate degradation | -7.277619 | 0.0000019 |
| PWY66-400: glycolysis VI (metazoan) | -7.313957 | 0.0000012 |
| PWY-6151: S-adenosyl-L-methionine cycle I | -7.319377 | 0.0000015 |
| ANAGLYCOLYSIS-PWY: glycolysis III (from glucose) | -7.328932 | 0.0000009 |
| PWY-6700: queuosine biosynthesis | -7.332738 | 0.0000019 |
| PWY-6126: superpathway of adenosine nucleotides de novo biosynthesis II | -7.376218 | 0.0000004 |
| PWY-7221: guanosine ribonucleotides de novo biosynthesis | -7.378509 | 0.0000010 |
| PWY-1042: glycolysis IV (plant cytosol) | -7.447206 | 0.0000013 |
| PEPTIDOGLYCANSYN-PWY: peptidoglycan biosynthesis I (meso-diaminopimelate containing) | -7.494650 | 0.0000013 |
| PWY-6387: UDP-N-acetylmuramoyl-pentapeptide biosynthesis I (meso-diaminopimelate containing) | -7.502355 | 0.0000012 |
| PWY-6386: UDP-N-acetylmuramoyl-pentapeptide biosynthesis II (lysine-containing) | -7.507127 | 0.0000010 |
| PWY-7229: superpathway of adenosine nucleotides de novo biosynthesis I | -7.541532 | 0.0000004 |
| PWY-7219: adenosine ribonucleotides de novo biosynthesis | -7.794523 | 0.0000008 |
Negative fold changes are higher in buccal samples while positive are higher in blood samples.
bb = pathways.loc[:, ['Buccal_Blood_FC', 'Buccal_Blood_p']]
bb = bb[bb['Buccal_Blood_p'] < 0.01]
bb= bb.sort_values(by=['Buccal_Blood_FC'], ascending=False).rename(columns={'Buccal_Blood_FC':'Log2 fold change', 'Buccal_Blood_p':'p'})
bb = pd.concat([bb.head(n=50), bb.tail(n=50)])
bb.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bb %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| PWY-3781: aerobic respiration I (cytochrome c) | 1.810779 | 0.0033223 |
| PWY-7007: methyl ketone biosynthesis | 1.424266 | 0.0000012 |
| GLYCOLYSIS-TCA-GLYOX-BYPASS: superpathway of glycolysis, pyruvate dehydrogenase, TCA, and glyoxylate bypass | -1.395076 | 0.0088227 |
| SO4ASSIM-PWY: sulfate reduction I (assimilatory) | -1.614670 | 0.0004843 |
| PWY-6969: TCA cycle V (2-oxoglutarate:ferredoxin oxidoreductase) | -1.750007 | 0.0070212 |
| PWY-5840: superpathway of menaquinol-7 biosynthesis | -1.808867 | 0.0031228 |
| PWY-7196: superpathway of pyrimidine ribonucleosides salvage | -1.853108 | 0.0030499 |
| P108-PWY: pyruvate fermentation to propanoate I | -1.853532 | 0.0005303 |
| HISDEG-PWY: L-histidine degradation I | -1.919521 | 0.0004811 |
| PWY-5920: superpathway of heme biosynthesis from glycine | -2.061224 | 0.0000282 |
| GLYCOCAT-PWY: glycogen degradation I (bacterial) | -2.074745 | 0.0003171 |
| TCA: TCA cycle I (prokaryotic) | -2.141814 | 0.0014856 |
| PWY-5690: TCA cycle II (plants and fungi) | -2.183025 | 0.0006881 |
| PWY-6901: superpathway of glucose and xylose degradation | -2.187600 | 0.0019796 |
| SULFATE-CYS-PWY: superpathway of sulfate assimilation and cysteine biosynthesis | -2.301408 | 0.0000383 |
| NAGLIPASYN-PWY: lipid IVA biosynthesis | -2.311088 | 0.0018788 |
| P125-PWY: superpathway of (R,R)-butanediol biosynthesis | -2.337138 | 0.0047167 |
| PWY-6628: superpathway of L-phenylalanine biosynthesis | -2.354140 | 0.0019767 |
| PWY66-398: TCA cycle III (animals) | -2.390681 | 0.0002442 |
| GLYCOLYSIS-E-D: superpathway of glycolysis and Entner-Doudoroff | -2.396497 | 0.0001922 |
| PYRIDOXSYN-PWY: pyridoxal 5’-phosphate biosynthesis I | -2.518405 | 0.0004261 |
| PWY-7013: L-1,2-propanediol degradation | -2.568330 | 0.0056055 |
| PWY-7115: C4 photosynthetic carbon assimilation cycle, NAD-ME type | -2.699176 | 0.0029609 |
| PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I | -2.717392 | 0.0002948 |
| PWY-5345: superpathway of L-methionine biosynthesis (by sulfhydrylation) | -2.744253 | 0.0000224 |
| HEME-BIOSYNTHESIS-II: heme biosynthesis I (aerobic) | -2.745087 | 0.0000196 |
| PWY-7211: superpathway of pyrimidine deoxyribonucleotides de novo biosynthesis | -2.752876 | 0.0000002 |
| PWY0-1061: superpathway of L-alanine biosynthesis | -2.788405 | 0.0028952 |
| ARGORNPROST-PWY: arginine, ornithine and proline interconversion | -2.811587 | 0.0001781 |
| PWY-6383: mono-trans, poly-cis decaprenyl phosphate biosynthesis | -2.812851 | 0.0008988 |
| PWY-5863: superpathway of phylloquinol biosynthesis | -2.856290 | 0.0000619 |
| P185-PWY: formaldehyde assimilation III (dihydroxyacetone cycle) | -2.861928 | 0.0053614 |
| PWY66-389: phytol degradation | -2.924220 | 0.0001742 |
| PWY-4981: L-proline biosynthesis II (from arginine) | -2.925803 | 0.0000654 |
| PWY-5464: superpathway of cytosolic glycolysis (plants), pyruvate dehydrogenase and TCA cycle | -2.936548 | 0.0006690 |
| PWY-5384: sucrose degradation IV (sucrose phosphorylase) | -2.955020 | 0.0028961 |
| PWY-5791: 1,4-dihydroxy-2-naphthoate biosynthesis II (plants) | -2.955055 | 0.0001177 |
| PWY-5837: 1,4-dihydroxy-2-naphthoate biosynthesis I | -2.955055 | 0.0001177 |
| P122-PWY: heterolactic fermentation | -3.030378 | 0.0041709 |
| PWY0-845: superpathway of pyridoxal 5’-phosphate biosynthesis and salvage | -3.046322 | 0.0001500 |
| PWY-5189: tetrapyrrole biosynthesis II (from glycine) | -3.122875 | 0.0002898 |
| PWY-5918: superpathay of heme biosynthesis from glutamate | -3.146932 | 0.0000004 |
| GLUTORN-PWY: L-ornithine biosynthesis | -3.237653 | 0.0014052 |
| PWY-5154: L-arginine biosynthesis III (via N-acetyl-L-citrulline) | -3.244932 | 0.0020478 |
| ARG+POLYAMINE-SYN: superpathway of arginine and polyamine biosynthesis | -3.264731 | 0.0037416 |
| PWY-6897: thiamin salvage II | -3.270383 | 0.0001049 |
| PWY-6527: stachyose degradation | -3.270419 | 0.0004801 |
| PWY-5989: stearate biosynthesis II (bacteria and plants) | -3.274229 | 0.0001892 |
| PWY-7279: aerobic respiration II (cytochrome c) (yeast) | -3.281298 | 0.0000000 |
| P461-PWY: hexitol fermentation to lactate, formate, ethanol and acetate | -3.317485 | 0.0019796 |
| PWY66-422: D-galactose degradation V (Leloir pathway) | -5.533690 | 0.0030667 |
| PWY-6317: galactose degradation I (Leloir pathway) | -5.533690 | 0.0040000 |
| PWY0-1296: purine ribonucleosides degradation | -5.534295 | 0.0001734 |
| PWY-2942: L-lysine biosynthesis III | -5.565221 | 0.0000109 |
| LACTOSECAT-PWY: lactose and galactose degradation I | -5.567233 | 0.0025906 |
| PWY-5686: UMP biosynthesis | -5.574031 | 0.0000801 |
| ARO-PWY: chorismate biosynthesis I | -5.588917 | 0.0001164 |
| THRESYN-PWY: superpathway of L-threonine biosynthesis | -5.594116 | 0.0000734 |
| PWY-7199: pyrimidine deoxyribonucleosides salvage | -5.595006 | 0.0048719 |
| PWY-3841: folate transformations II | -5.628953 | 0.0000703 |
| UDPNAGSYN-PWY: UDP-N-acetyl-D-glucosamine biosynthesis I | -5.638122 | 0.0001103 |
| COMPLETE-ARO-PWY: superpathway of aromatic amino acid biosynthesis | -5.642542 | 0.0000879 |
| COA-PWY-1: coenzyme A biosynthesis II (mammalian) | -5.669929 | 0.0000608 |
| PWY-841: superpathway of purine nucleotides de novo biosynthesis I | -5.670172 | 0.0000479 |
| PWY-6385: peptidoglycan biosynthesis III (mycobacteria) | -5.704447 | 0.0000689 |
| PWY-6700: queuosine biosynthesis | -5.705070 | 0.0000935 |
| PWY-3001: superpathway of L-isoleucine biosynthesis I | -5.706412 | 0.0000531 |
| PEPTIDOGLYCANSYN-PWY: peptidoglycan biosynthesis I (meso-diaminopimelate containing) | -5.707573 | 0.0000435 |
| PWY66-400: glycolysis VI (metazoan) | -5.714766 | 0.0000467 |
| PWY-6387: UDP-N-acetylmuramoyl-pentapeptide biosynthesis I (meso-diaminopimelate containing) | -5.721121 | 0.0000587 |
| PWY-6151: S-adenosyl-L-methionine cycle I | -5.725540 | 0.0000531 |
| PWY0-1319: CDP-diacylglycerol biosynthesis II | -5.756408 | 0.0001248 |
| PWY-6386: UDP-N-acetylmuramoyl-pentapeptide biosynthesis II (lysine-containing) | -5.769150 | 0.0000592 |
| DENOVOPURINE2-PWY: superpathway of purine nucleotides de novo biosynthesis II | -5.776011 | 0.0000736 |
| PWY-6737: starch degradation V | -5.788743 | 0.0016341 |
| PWY-1042: glycolysis IV (plant cytosol) | -5.788916 | 0.0000241 |
| ANAGLYCOLYSIS-PWY: glycolysis III (from glucose) | -5.791165 | 0.0000196 |
| PWY-5103: L-isoleucine biosynthesis III | -5.808592 | 0.0000335 |
| BRANCHED-CHAIN-AA-SYN-PWY: superpathway of branched amino acid biosynthesis | -5.809792 | 0.0000297 |
| PWY-7187: pyrimidine deoxyribonucleotides de novo biosynthesis II | -5.835531 | 0.0000828 |
| PWY-7197: pyrimidine deoxyribonucleotide phosphorylation | -5.853169 | 0.0000647 |
| PWY-7208: superpathway of pyrimidine nucleobases salvage | -5.879577 | 0.0000531 |
| PWY-7221: guanosine ribonucleotides de novo biosynthesis | -5.930624 | 0.0000170 |
| PWY-6125: superpathway of guanosine nucleotides de novo biosynthesis II | -5.954771 | 0.0000291 |
| PWY0-166: superpathway of pyrimidine deoxyribonucleotides de novo biosynthesis (E. coli) | -5.966907 | 0.0000549 |
| PWY-6147: 6-hydroxymethyl-dihydropterin diphosphate biosynthesis I | -5.981678 | 0.0000161 |
| PWY-7184: pyrimidine deoxyribonucleotides de novo biosynthesis I | -6.009680 | 0.0000506 |
| PWY-7228: superpathway of guanosine nucleotides de novo biosynthesis I | -6.014350 | 0.0000246 |
| PWY-6121: 5-aminoimidazole ribonucleotide biosynthesis I | -6.034025 | 0.0000437 |
| ILEUSYN-PWY: L-isoleucine biosynthesis I (from threonine) | -6.034119 | 0.0000277 |
| VALSYN-PWY: L-valine biosynthesis | -6.034119 | 0.0000277 |
| PWY-6122: 5-aminoimidazole ribonucleotide biosynthesis II | -6.049958 | 0.0000323 |
| PWY-6277: superpathway of 5-aminoimidazole ribonucleotide biosynthesis | -6.049958 | 0.0000323 |
| PWY-7222: guanosine deoxyribonucleotides de novo biosynthesis II | -6.101319 | 0.0000471 |
| PWY-7220: adenosine deoxyribonucleotides de novo biosynthesis II | -6.101319 | 0.0000471 |
| PWY-7111: pyruvate fermentation to isobutanol (engineered) | -6.269354 | 0.0000386 |
| PWY-6609: adenine and adenosine salvage III | -6.345057 | 0.0006892 |
| PWY-6126: superpathway of adenosine nucleotides de novo biosynthesis II | -6.359716 | 0.0000637 |
| PWY-7229: superpathway of adenosine nucleotides de novo biosynthesis I | -6.446709 | 0.0000598 |
| PWY-7219: adenosine ribonucleotides de novo biosynthesis | -6.623166 | 0.0000374 |
Negative fold changes are higher in saliva samples while positive are higher in buccal samples.
sb = pathways.loc[:, ['Saliva_Buccal_FC', 'Saliva_Buccal_p']]
sb = sb[sb['Saliva_Buccal_p'] < 0.01]
sb= sb.sort_values(by=['Saliva_Buccal_FC'], ascending=False).rename(columns={'Saliva_Buccal_FC':'Log2 fold change', 'Saliva_Buccal_p':'p'})
sb = pd.concat([sb.head(n=50), sb.tail(n=50)])
sb.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$sb %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| PWY66-388: fatty acid α-oxidation III | 2.359026 | 0.0008840 |
| PWY-5304: superpathway of sulfur oxidation (Acidianus ambivalens) | 2.298469 | 0.0018401 |
| PWY-6309: L-tryptophan degradation XI (mammalian, via kynurenine) | 1.303619 | 0.0015578 |
| LIPASYN-PWY: phospholipases | 1.121379 | 0.0027953 |
| PWY-5067: glycogen biosynthesis II (from UDP-D-Glucose) | -1.056819 | 0.0001084 |
| URSIN-PWY: ureide biosynthesis | -1.125074 | 0.0005477 |
| THREOCAT-PWY: superpathway of L-threonine metabolism | -1.222813 | 0.0000033 |
| GLYCOCAT-PWY: glycogen degradation I (bacterial) | -1.319259 | 0.0087915 |
| PWY0-781: aspartate superpathway | -1.450322 | 0.0032561 |
| FUC-RHAMCAT-PWY: superpathway of fucose and rhamnose degradation | -1.462906 | 0.0028178 |
| DAPLYSINESYN-PWY: L-lysine biosynthesis I | -1.475467 | 0.0050118 |
| ARGSYNBSUB-PWY: L-arginine biosynthesis II (acetyl cycle) | -1.530598 | 0.0098398 |
| PWY-7279: aerobic respiration II (cytochrome c) (yeast) | -1.584070 | 0.0014933 |
| GALACTUROCAT-PWY: D-galacturonate degradation I | -1.599911 | 0.0093617 |
| PWY-6344: L-ornithine degradation II (Stickland reaction) | -1.650884 | 0.0001819 |
| RHAMCAT-PWY: L-rhamnose degradation I | -1.670477 | 0.0096442 |
| ARGORNPROST-PWY: arginine, ornithine and proline interconversion | -1.680905 | 0.0084875 |
| PWY-6859: all-trans-farnesol biosynthesis | -1.700749 | 0.0090943 |
| PWY-5464: superpathway of cytosolic glycolysis (plants), pyruvate dehydrogenase and TCA cycle | -1.740130 | 0.0054514 |
| PWY66-398: TCA cycle III (animals) | -1.800719 | 0.0020909 |
| PWY0-162: superpathway of pyrimidine ribonucleotides de novo biosynthesis | -1.807031 | 0.0063393 |
| PWY-7383: anaerobic energy metabolism (invertebrates, cytosol) | -1.837946 | 0.0056222 |
| PWY4FS-8: phosphatidylglycerol biosynthesis II (non-plastidic) | -1.848239 | 0.0085834 |
| PWY-5392: reductive TCA cycle II | -1.871932 | 0.0034699 |
| PWY-5097: L-lysine biosynthesis VI | -1.872334 | 0.0050720 |
| ARGSYN-PWY: L-arginine biosynthesis I (via L-ornithine) | -1.915180 | 0.0076239 |
| GLYCOGENSYNTH-PWY: glycogen biosynthesis I (from ADP-D-Glucose) | -1.932810 | 0.0099337 |
| PWY-7400: L-arginine biosynthesis IV (archaebacteria) | -1.942100 | 0.0084078 |
| ASPASN-PWY: superpathway of L-aspartate and L-asparagine biosynthesis | -1.964838 | 0.0084397 |
| FUCCAT-PWY: fucose degradation | -1.968721 | 0.0085177 |
| PWY-5690: TCA cycle II (plants and fungi) | -1.999274 | 0.0020765 |
| PWY-5345: superpathway of L-methionine biosynthesis (by sulfhydrylation) | -2.008031 | 0.0057161 |
| PWY-5667: CDP-diacylglycerol biosynthesis I | -2.010024 | 0.0034229 |
| SO4ASSIM-PWY: sulfate reduction I (assimilatory) | -2.026512 | 0.0067341 |
| PWY-7007: methyl ketone biosynthesis | -2.037466 | 0.0026320 |
| PWY-6630: superpathway of L-tyrosine biosynthesis | -2.047251 | 0.0005294 |
| PWY-6590: superpathway of Clostridium acetobutylicum acidogenic fermentation | -2.052614 | 0.0074876 |
| TRNA-CHARGING-PWY: tRNA charging | -2.052633 | 0.0086581 |
| PWY0-42: 2-methylcitrate cycle I | -2.058113 | 0.0033138 |
| PWY0-862: (5Z)-dodec-5-enoate biosynthesis | -2.069680 | 0.0045255 |
| PWY-7392: taxadiene biosynthesis (engineered) | -2.154615 | 0.0030503 |
| PWY-5121: superpathway of geranylgeranyl diphosphate biosynthesis II (via MEP) | -2.158286 | 0.0027898 |
| DENITRIFICATION-PWY: nitrate reduction I (denitrification) | -2.163812 | 0.0020431 |
| SULFATE-CYS-PWY: superpathway of sulfate assimilation and cysteine biosynthesis | -2.196655 | 0.0042274 |
| PWY-5747: 2-methylcitrate cycle II | -2.206359 | 0.0024559 |
| PWY-5188: tetrapyrrole biosynthesis I (from glutamate) | -2.244322 | 0.0025818 |
| SER-GLYSYN-PWY: superpathway of L-serine and glycine biosynthesis I | -2.259807 | 0.0022405 |
| PWY-7664: oleate biosynthesis IV (anaerobic) | -2.265805 | 0.0024839 |
| PWY-5863: superpathway of phylloquinol biosynthesis | -2.267122 | 0.0011358 |
| PWY-7560: methylerythritol phosphate pathway II | -2.271288 | 0.0019152 |
| ANAEROFRUCAT-PWY: homolactic fermentation | -2.681518 | 0.0029751 |
| PWY-6305: putrescine biosynthesis IV | -2.692702 | 0.0029161 |
| PWY-6549: L-glutamine biosynthesis III | -2.697465 | 0.0006607 |
| PWY-821: superpathway of sulfur amino acid biosynthesis (Saccharomyces cerevisiae) | -2.714446 | 0.0013771 |
| PWY-7663: gondoate biosynthesis (anaerobic) | -2.724747 | 0.0007663 |
| P108-PWY: pyruvate fermentation to propanoate I | -2.742366 | 0.0014147 |
| PWY-6891: thiazole biosynthesis II (Bacillus) | -2.765335 | 0.0006256 |
| GLYCOLYSIS-E-D: superpathway of glycolysis and Entner-Doudoroff | -2.774376 | 0.0019819 |
| PWY-5177: glutaryl-CoA degradation | -2.776678 | 0.0088468 |
| PWY-6897: thiamin salvage II | -2.793742 | 0.0036289 |
| RIBOSYN2-PWY: flavin biosynthesis I (bacteria and plants) | -2.794366 | 0.0027831 |
| HEMESYN2-PWY: heme biosynthesis II (anaerobic) | -2.807997 | 0.0001720 |
| TCA: TCA cycle I (prokaryotic) | -2.819134 | 0.0045786 |
| PWY-5484: glycolysis II (from fructose 6-phosphate) | -2.824266 | 0.0022239 |
| PWY-7003: glycerol degradation to butanol | -2.827942 | 0.0088582 |
| NONMEVIPP-PWY: methylerythritol phosphate pathway I | -2.830198 | 0.0018007 |
| PWY-5920: superpathway of heme biosynthesis from glycine | -2.850742 | 0.0003789 |
| PWY66-389: phytol degradation | -2.857682 | 0.0020576 |
| PWY-7371: 1,4-dihydroxy-6-naphthoate biosynthesis II | -2.884515 | 0.0029288 |
| PWY-5840: superpathway of menaquinol-7 biosynthesis | -2.893458 | 0.0042677 |
| PWY-6628: superpathway of L-phenylalanine biosynthesis | -2.905931 | 0.0023358 |
| HEME-BIOSYNTHESIS-II: heme biosynthesis I (aerobic) | -2.955786 | 0.0010630 |
| HISDEG-PWY: L-histidine degradation I | -2.959526 | 0.0057620 |
| PWY-5104: L-isoleucine biosynthesis IV | -2.983969 | 0.0072339 |
| PWY-5088: L-glutamate degradation VIII (to propanoate) | -3.049735 | 0.0034401 |
| PWY-6892: thiazole biosynthesis I (E. coli) | -3.057563 | 0.0006771 |
| PWY0-1415: superpathway of heme biosynthesis from uroporphyrinogen-III | -3.064061 | 0.0016315 |
| PANTO-PWY: phosphopantothenate biosynthesis I | -3.088549 | 0.0019909 |
| PWY-7254: TCA cycle VII (acetate-producers) | -3.091493 | 0.0019959 |
| PWY-6263: superpathway of menaquinol-8 biosynthesis II | -3.136512 | 0.0009392 |
| PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I | -3.136571 | 0.0017682 |
| REDCITCYC: TCA cycle VIII (helicobacter) | -3.177954 | 0.0008579 |
| PYRIDNUCSYN-PWY: NAD biosynthesis I (from aspartate) | -3.304996 | 0.0009613 |
| PYRIDNUCSAL-PWY: NAD salvage pathway I | -3.338072 | 0.0001963 |
| PWY-241: C4 photosynthetic carbon assimilation cycle, NADP-ME type | -3.356106 | 0.0030459 |
| PWY-6545: pyrimidine deoxyribonucleotides de novo biosynthesis III | -3.434542 | 0.0015424 |
| PWY-7389: superpathway of anaerobic energy metabolism (invertebrates) | -3.497806 | 0.0008188 |
| PWY-5101: L-isoleucine biosynthesis II | -3.534535 | 0.0037582 |
| PWY4LZ-257: superpathway of fermentation (Chlamydomonas reinhardtii) | -3.576991 | 0.0009879 |
| P23-PWY: reductive TCA cycle I | -3.688232 | 0.0007033 |
| PWY-7384: anaerobic energy metabolism (invertebrates, mitochondrial) | -3.881671 | 0.0019779 |
| PWY-6803: phosphatidylcholine acyl editing | -3.975240 | 0.0001431 |
| PWY-7323: superpathway of GDP-mannose-derived O-antigen building blocks biosynthesis | -4.007135 | 0.0018467 |
| P42-PWY: incomplete reductive TCA cycle | -4.266303 | 0.0005118 |
| THISYN-PWY: superpathway of thiamin diphosphate biosynthesis I | -4.416848 | 0.0005514 |
| COLANSYN-PWY: colanic acid building blocks biosynthesis | -4.530580 | 0.0015287 |
| PWY-6895: superpathway of thiamin diphosphate biosynthesis II | -4.705532 | 0.0000302 |
| PWY-5941: glycogen degradation II (eukaryotic) | -4.758591 | 0.0033992 |
| PWY-5030: L-histidine degradation III | -4.958765 | 0.0019046 |
| PWY-6992: 1,5-anhydrofructose degradation | -5.695308 | 0.0080382 |
Negative fold changes are higher in saliva samples while positive are higher in saliva_euk samples.
ss = pathways.loc[:, ['Saliva_Saliva_euk_FC', 'Saliva_Saliva_euk_p']]
ss = ss[ss['Saliva_Saliva_euk_p'] < 0.01]
ss = ss.sort_values(by=['Saliva_Saliva_euk_FC'], ascending=False).rename(columns={'Saliva_Saliva_euk_FC':'Log2 fold change', 'Saliva_Saliva_euk_p':'p'})
ss = pd.concat([ss.head(n=50), ss.tail(n=50)])
ss = ss.groupby(by=ss.index, axis=0).mean()
ss.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$ss %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| URSIN-PWY: ureide biosynthesis | -1.125074 | 0.0005477 |
| PWY-6309: L-tryptophan degradation XI (mammalian, via kynurenine) | -1.153830 | 0.0080835 |
| FUC-RHAMCAT-PWY: superpathway of fucose and rhamnose degradation | -1.479367 | 0.0034393 |
| PWY-5392: reductive TCA cycle II | -1.593244 | 0.0050213 |
| PWY-6344: L-ornithine degradation II (Stickland reaction) | -1.650884 | 0.0001819 |
| PWY-622: starch biosynthesis | -1.830364 | 0.0068348 |
| PWY-5067: glycogen biosynthesis II (from UDP-D-Glucose) | -1.901485 | 0.0060668 |
| TCA-GLYOX-BYPASS: superpathway of glyoxylate bypass and TCA | -1.950438 | 0.0061700 |
| GLYCOLYSIS-TCA-GLYOX-BYPASS: superpathway of glycolysis, pyruvate dehydrogenase, TCA, and glyoxylate bypass | -2.017885 | 0.0079801 |
| P125-PWY: superpathway of (R,R)-butanediol biosynthesis | -2.029921 | 0.0061998 |
| PWY-6901: superpathway of glucose and xylose degradation | -2.336738 | 0.0083937 |
| DENITRIFICATION-PWY: nitrate reduction I (denitrification) | -2.410953 | 0.0030787 |
| PWY-5384: sucrose degradation IV (sucrose phosphorylase) | -2.497972 | 0.0063036 |
| PYRIDNUCSAL-PWY: NAD salvage pathway I | -3.338072 | 0.0001660 |
Negative fold changes are higher in buccal samples while positive are higher in buccal_euk samples.
bbe = pathways.loc[:, ['Buccal_Buccal_euk_FC', 'Buccal_Buccal_euk_p']]
bbe = bbe[bbe['Buccal_Buccal_euk_p'] < 0.01]
bbe = bbe.sort_values(by=['Buccal_Buccal_euk_FC'], ascending=False).rename(columns={'Buccal_Buccal_euk_FC':'Log2 fold change', 'Buccal_Buccal_euk_p':'p'})
bbe = pd.concat([bbe.head(n=50), bbe.tail(n=50)])
bbe = bbe.groupby(by=bbe.index, axis=0).mean()
bbe.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bbe %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| PWY-7007: methyl ketone biosynthesis | 1.424266 | 0.0000012 |
| SO4ASSIM-PWY: sulfate reduction I (assimilatory) | -1.614670 | 0.0004649 |
| PWY-6969: TCA cycle V (2-oxoglutarate:ferredoxin oxidoreductase) | -1.750007 | 0.0070212 |
| PWY-5840: superpathway of menaquinol-7 biosynthesis | -1.808867 | 0.0031228 |
| PWY-7196: superpathway of pyrimidine ribonucleosides salvage | -1.853108 | 0.0030499 |
| P108-PWY: pyruvate fermentation to propanoate I | -1.853532 | 0.0010202 |
| HISDEG-PWY: L-histidine degradation I | -1.919521 | 0.0045971 |
| PWY-5920: superpathway of heme biosynthesis from glycine | -2.061224 | 0.0000282 |
| GLYCOCAT-PWY: glycogen degradation I (bacterial) | -2.074745 | 0.0003171 |
| PWY-7279: aerobic respiration II (cytochrome c) (yeast) | -2.121906 | 0.0059900 |
| PWY-6901: superpathway of glucose and xylose degradation | -2.187600 | 0.0019796 |
| SULFATE-CYS-PWY: superpathway of sulfate assimilation and cysteine biosynthesis | -2.301408 | 0.0000380 |
| NAGLIPASYN-PWY: lipid IVA biosynthesis | -2.311088 | 0.0018788 |
| PWY-7663: gondoate biosynthesis (anaerobic) | -2.359405 | 0.0058709 |
| PWY66-398: TCA cycle III (animals) | -2.390681 | 0.0003488 |
| PWY-6383: mono-trans, poly-cis decaprenyl phosphate biosynthesis | -2.408960 | 0.0099503 |
| PYRIDOXSYN-PWY: pyridoxal 5’-phosphate biosynthesis I | -2.518405 | 0.0004261 |
| PWY66-389: phytol degradation | -2.557491 | 0.0008160 |
| PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I | -2.717392 | 0.0002948 |
| PWY-5189: tetrapyrrole biosynthesis II (from glycine) | -2.738729 | 0.0011017 |
| PWY-5345: superpathway of L-methionine biosynthesis (by sulfhydrylation) | -2.744253 | 0.0003297 |
| PWY-7211: superpathway of pyrimidine deoxyribonucleotides de novo biosynthesis | -2.752876 | 0.0000002 |
| ARGORNPROST-PWY: arginine, ornithine and proline interconversion | -2.811587 | 0.0012819 |
| SER-GLYSYN-PWY: superpathway of L-serine and glycine biosynthesis I | -2.816353 | 0.0000083 |
| PWY-5863: superpathway of phylloquinol biosynthesis | -2.856290 | 0.0049809 |
| PWY-4981: L-proline biosynthesis II (from arginine) | -2.896603 | 0.0003088 |
| PWY-5464: superpathway of cytosolic glycolysis (plants), pyruvate dehydrogenase and TCA cycle | -2.936548 | 0.0006690 |
| PWY-5791: 1,4-dihydroxy-2-naphthoate biosynthesis II (plants) | -2.942201 | 0.0032297 |
| PWY-5837: 1,4-dihydroxy-2-naphthoate biosynthesis I | -2.942201 | 0.0032297 |
| THISYNARA-PWY: superpathway of thiamin diphosphate biosynthesis III (eukaryotes) | -3.008688 | 0.0032675 |
| PWY0-845: superpathway of pyridoxal 5’-phosphate biosynthesis and salvage | -3.046322 | 0.0001500 |
| PWY-5918: superpathay of heme biosynthesis from glutamate | -3.146932 | 0.0001546 |
| PWY-5989: stearate biosynthesis II (bacteria and plants) | -3.274229 | 0.0001892 |
| PWY-6168: flavin biosynthesis III (fungi) | -3.317752 | 0.0010314 |
| PENTOSE-P-PWY: pentose phosphate pathway | -3.403425 | 0.0097015 |
| PWY4FS-7: phosphatidylglycerol biosynthesis I (plastidic) | -3.410684 | 0.0031237 |
| HEMESYN2-PWY: heme biosynthesis II (anaerobic) | -3.444899 | 0.0040657 |
| PWY-6282: palmitoleate biosynthesis I (from (5Z)-dodec-5-enoate) | -3.469113 | 0.0001367 |
| PWY-5973: cis-vaccenate biosynthesis | -3.491723 | 0.0001673 |
| PWY-7282: 4-amino-2-methyl-5-phosphomethylpyrimidine biosynthesis (yeast) | -3.525053 | 0.0000645 |
| PWY-5899: superpathway of menaquinol-13 biosynthesis | -3.544246 | 0.0028078 |
| PWY-5897: superpathway of menaquinol-11 biosynthesis | -3.544246 | 0.0028078 |
| PWY-5898: superpathway of menaquinol-12 biosynthesis | -3.544246 | 0.0028078 |
| PWY-5121: superpathway of geranylgeranyl diphosphate biosynthesis II (via MEP) | -3.550032 | 0.0063343 |
| PWY-7392: taxadiene biosynthesis (engineered) | -3.555892 | 0.0033284 |
| FASYN-ELONG-PWY: fatty acid elongation – saturated | -3.580566 | 0.0001118 |
| NONMEVIPP-PWY: methylerythritol phosphate pathway I | -3.655068 | 0.0026956 |
| PWY4FS-8: phosphatidylglycerol biosynthesis II (non-plastidic) | -3.677122 | 0.0014006 |
| PWYG-321: mycolate biosynthesis | -3.696486 | 0.0001244 |
| DAPLYSINESYN-PWY: L-lysine biosynthesis I | -3.741257 | 0.0000892 |
| PWY-7664: oleate biosynthesis IV (anaerobic) | -3.755008 | 0.0001399 |
| PWY-5188: tetrapyrrole biosynthesis I (from glutamate) | -3.781072 | 0.0043172 |
| PWY0-781: aspartate superpathway | -3.801118 | 0.0000014 |
| PWY-5097: L-lysine biosynthesis VI | -3.809828 | 0.0071639 |
| ARGSYNBSUB-PWY: L-arginine biosynthesis II (acetyl cycle) | -3.832655 | 0.0046513 |
| RIBOSYN2-PWY: flavin biosynthesis I (bacteria and plants) | -3.842746 | 0.0001349 |
| PWY-7560: methylerythritol phosphate pathway II | -3.843193 | 0.0003818 |
| PWY-5667: CDP-diacylglycerol biosynthesis I | -3.859761 | 0.0006152 |
| PHOSLIPSYN-PWY: superpathway of phospholipid biosynthesis I (bacteria) | -3.879878 | 0.0051632 |
| PWY-6270: isoprene biosynthesis I | -3.946063 | 0.0005631 |
| GLYCOGENSYNTH-PWY: glycogen biosynthesis I (from ADP-D-Glucose) | -3.961867 | 0.0022039 |
| PWY0-1586: peptidoglycan maturation (meso-diaminopimelate containing) | -4.030221 | 0.0049877 |
| PWY-7388: octanoyl-[acyl-carrier protein] biosynthesis (mitochondria, yeast) | -4.149346 | 0.0023237 |
| P4-PWY: superpathway of L-lysine, L-threonine and L-methionine biosynthesis I | -4.555713 | 0.0005788 |
Negative fold changes are higher in blood samples while positive are higher in saliva_euk samples.
blbe = pathways.loc[:, ['Blood_Saliva_euk_FC', 'Blood_Saliva_euk_p']]
blbe = blbe[blbe['Blood_Saliva_euk_p'] < 0.01]
blbe = blbe.sort_values(by=['Blood_Saliva_euk_FC'], ascending=False).rename(columns={'Blood_Saliva_euk_FC':'Log2 fold change', 'Blood_Saliva_euk_p':'p'})
blbe = pd.concat([blbe.head(n=50), blbe.tail(n=50)])
blbe = blbe.groupby(by=blbe.index, axis=0).mean()
blbe.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$blbe %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| PWY-7219: adenosine ribonucleotides de novo biosynthesis | 6.1269895 | 0.0000746 |
| PWY-7229: superpathway of adenosine nucleotides de novo biosynthesis I | 5.9557212 | 0.0000960 |
| PWY-6700: queuosine biosynthesis | 5.8952182 | 0.0001747 |
| PWY-3841: folate transformations II | 5.8250691 | 0.0001884 |
| PWY-1042: glycolysis IV (plant cytosol) | 5.8248625 | 0.0006432 |
| PWY-6126: superpathway of adenosine nucleotides de novo biosynthesis II | 5.8126818 | 0.0001099 |
| PWY-6386: UDP-N-acetylmuramoyl-pentapeptide biosynthesis II (lysine-containing) | 5.7677714 | 0.0011508 |
| 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis | 5.7645176 | 0.0003939 |
| PEPTIDOGLYCANSYN-PWY: peptidoglycan biosynthesis I (meso-diaminopimelate containing) | 5.7581975 | 0.0006188 |
| PWY-6387: UDP-N-acetylmuramoyl-pentapeptide biosynthesis I (meso-diaminopimelate containing) | 5.7565472 | 0.0004957 |
| PWY-7221: guanosine ribonucleotides de novo biosynthesis | 5.7561276 | 0.0001316 |
| PWY-6147: 6-hydroxymethyl-dihydropterin diphosphate biosynthesis I | 5.7542654 | 0.0004848 |
| PWY-6151: S-adenosyl-L-methionine cycle I | 5.7277924 | 0.0003969 |
| PWY0-166: superpathway of pyrimidine deoxyribonucleotides de novo biosynthesis (E. coli) | 5.7020234 | 0.0001720 |
| PWY-6277: superpathway of 5-aminoimidazole ribonucleotide biosynthesis | 5.6901329 | 0.0005396 |
| PWY-6122: 5-aminoimidazole ribonucleotide biosynthesis II | 5.6901329 | 0.0005396 |
| ANAGLYCOLYSIS-PWY: glycolysis III (from glucose) | 5.6882823 | 0.0004411 |
| PWY-5695: urate biosynthesis/inosine 5’-phosphate degradation | 5.6795194 | 0.0003990 |
| PWY-7184: pyrimidine deoxyribonucleotides de novo biosynthesis I | 5.6568938 | 0.0001681 |
| PWY-7228: superpathway of guanosine nucleotides de novo biosynthesis I | 5.6551977 | 0.0001406 |
| PWY-6121: 5-aminoimidazole ribonucleotide biosynthesis I | 5.6263061 | 0.0008668 |
| PWY66-400: glycolysis VI (metazoan) | 5.6234895 | 0.0005515 |
| GLYCOLYSIS: glycolysis I (from glucose 6-phosphate) | 5.6136348 | 0.0003154 |
| PWY-7220: adenosine deoxyribonucleotides de novo biosynthesis II | 5.6119986 | 0.0001709 |
| PWY-7222: guanosine deoxyribonucleotides de novo biosynthesis II | 5.6119986 | 0.0001709 |
| PWY-6385: peptidoglycan biosynthesis III (mycobacteria) | 5.6106221 | 0.0012744 |
| PWY-5686: UMP biosynthesis | 5.6030538 | 0.0003824 |
| PWY-841: superpathway of purine nucleotides de novo biosynthesis I | 5.5904864 | 0.0004242 |
| PWY-6125: superpathway of guanosine nucleotides de novo biosynthesis II | 5.5843868 | 0.0001492 |
| PWY-2942: L-lysine biosynthesis III | 5.5585510 | 0.0003132 |
| COA-PWY-1: coenzyme A biosynthesis II (mammalian) | 5.5463333 | 0.0010274 |
| PWY-7539: 6-hydroxymethyl-dihydropterin diphosphate biosynthesis III (Chlamydia) | 5.5353641 | 0.0007482 |
| NONMEVIPP-PWY: methylerythritol phosphate pathway I | 5.5236915 | 0.0018069 |
| DENOVOPURINE2-PWY: superpathway of purine nucleotides de novo biosynthesis II | 5.5117418 | 0.0004067 |
| PWY-7187: pyrimidine deoxyribonucleotides de novo biosynthesis II | 5.5071869 | 0.0003277 |
| TRNA-CHARGING-PWY: tRNA charging | 5.4954667 | 0.0004910 |
| ILEUSYN-PWY: L-isoleucine biosynthesis I (from threonine) | 5.4938084 | 0.0000965 |
| VALSYN-PWY: L-valine biosynthesis | 5.4938084 | 0.0000965 |
| PWY-5097: L-lysine biosynthesis VI | 5.4857195 | 0.0003747 |
| PWY-5100: pyruvate fermentation to acetate and lactate II | 5.4746135 | 0.0002767 |
| PWY-7208: superpathway of pyrimidine nucleobases salvage | 5.4565280 | 0.0002225 |
| PWY-5484: glycolysis II (from fructose 6-phosphate) | 5.4425775 | 0.0003163 |
| PWY-7111: pyruvate fermentation to isobutanol (engineered) | 5.4019701 | 0.0000882 |
| BRANCHED-CHAIN-AA-SYN-PWY: superpathway of branched amino acid biosynthesis | 5.3840107 | 0.0002105 |
| CALVIN-PWY: Calvin-Benson-Bassham cycle | 5.3717316 | 0.0002489 |
| ANAEROFRUCAT-PWY: homolactic fermentation | 5.3672958 | 0.0005483 |
| PWY-5103: L-isoleucine biosynthesis III | 5.3424743 | 0.0002371 |
| PWY-6168: flavin biosynthesis III (fungi) | 5.3365914 | 0.0021062 |
| DTDPRHAMSYN-PWY: dTDP-L-rhamnose biosynthesis I | 5.3168256 | 0.0007737 |
| PWY-6123: inosine-5’-phosphate biosynthesis I | 5.2887943 | 0.0002369 |
| PWY66-422: D-galactose degradation V (Leloir pathway) | 4.2985076 | 0.0019566 |
| PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I | 4.2677020 | 0.0037708 |
| P4-PWY: superpathway of L-lysine, L-threonine and L-methionine biosynthesis I | 4.2672481 | 0.0007192 |
| PWY-5121: superpathway of geranylgeranyl diphosphate biosynthesis II (via MEP) | 4.2220106 | 0.0013731 |
| PWY-6897: thiamin salvage II | 4.1832183 | 0.0071123 |
| PWY4FS-7: phosphatidylglycerol biosynthesis I (plastidic) | 4.1814961 | 0.0026984 |
| PWY4FS-8: phosphatidylglycerol biosynthesis II (non-plastidic) | 4.1421538 | 0.0026989 |
| PWY66-389: phytol degradation | 4.1180691 | 0.0003474 |
| PWY-6317: galactose degradation I (Leloir pathway) | 4.1029088 | 0.0029584 |
| HISTSYN-PWY: L-histidine biosynthesis | 4.0927985 | 0.0007152 |
| HEME-BIOSYNTHESIS-II: heme biosynthesis I (aerobic) | 4.0692858 | 0.0005075 |
| PWY-7282: 4-amino-2-methyl-5-phosphomethylpyrimidine biosynthesis (yeast) | 4.0662574 | 0.0004197 |
| GLYCOGENSYNTH-PWY: glycogen biosynthesis I (from ADP-D-Glucose) | 4.0442863 | 0.0020580 |
| P441-PWY: superpathway of N-acetylneuraminate degradation | 4.0423428 | 0.0013078 |
| PENTOSE-P-PWY: pentose phosphate pathway | 4.0263385 | 0.0004375 |
| PWY0-1261: anhydromuropeptides recycling | 4.0238160 | 0.0012984 |
| PWY-5918: superpathay of heme biosynthesis from glutamate | 3.9569128 | 0.0012311 |
| PWY0-781: aspartate superpathway | 3.9424335 | 0.0008994 |
| PWY-7117: C4 photosynthetic carbon assimilation cycle, PEPCK type | 3.9019110 | 0.0013498 |
| ARGSYNBSUB-PWY: L-arginine biosynthesis II (acetyl cycle) | 3.8135193 | 0.0007451 |
| PWY-7383: anaerobic energy metabolism (invertebrates, cytosol) | 3.7855839 | 0.0018684 |
| PWY0-1297: superpathway of purine deoxyribonucleosides degradation | 3.7095790 | 0.0002252 |
| PWY0-845: superpathway of pyridoxal 5’-phosphate biosynthesis and salvage | 3.6582842 | 0.0013928 |
| TCA: TCA cycle I (prokaryotic) | 3.6566818 | 0.0008608 |
| FERMENTATION-PWY: mixed acid fermentation | 3.6134686 | 0.0013447 |
| HEXITOLDEGSUPER-PWY: superpathway of hexitol degradation (bacteria) | 3.5642268 | 0.0011384 |
| PWY0-1298: superpathway of pyrimidine deoxyribonucleosides degradation | 3.5336018 | 0.0006256 |
| PWY-6471: peptidoglycan biosynthesis IV (Enterococcus faecium) | 3.5015074 | 0.0026748 |
| PWY-7279: aerobic respiration II (cytochrome c) (yeast) | 3.4426575 | 0.0000269 |
| PYRIDOXSYN-PWY: pyridoxal 5’-phosphate biosynthesis I | 3.4231248 | 0.0043206 |
| GLYCOLYSIS-E-D: superpathway of glycolysis and Entner-Doudoroff | 3.4177244 | 0.0006304 |
| PWY-5913: TCA cycle VI (obligate autotrophs) | 3.4130841 | 0.0026517 |
| P161-PWY: acetylene degradation | 3.3375681 | 0.0003007 |
| PWY-4981: L-proline biosynthesis II (from arginine) | 3.3333144 | 0.0024192 |
| GLCMANNANAUT-PWY: superpathway of N-acetylglucosamine, N-acetylmannosamine and N-acetylneuraminate degradation | 3.2463208 | 0.0086455 |
| PWY0-1415: superpathway of heme biosynthesis from uroporphyrinogen-III | 2.9864453 | 0.0012454 |
| PWY-5690: TCA cycle II (plants and fungi) | 2.9768770 | 0.0001744 |
| P122-PWY: heterolactic fermentation | 2.8025947 | 0.0035444 |
| ARGORNPROST-PWY: arginine, ornithine and proline interconversion | 2.8017007 | 0.0048207 |
| PWY-5345: superpathway of L-methionine biosynthesis (by sulfhydrylation) | 2.7348747 | 0.0024793 |
| PWY-5173: superpathway of acetyl-CoA biosynthesis | 2.7070859 | 0.0011152 |
| PWY-6803: phosphatidylcholine acyl editing | 2.6808764 | 0.0003176 |
| SULFATE-CYS-PWY: superpathway of sulfate assimilation and cysteine biosynthesis | 2.5335437 | 0.0035800 |
| PPGPPMET-PWY: ppGpp biosynthesis | 2.4945955 | 0.0059352 |
| PWY0-1479: tRNA processing | 2.4669272 | 0.0017515 |
| PWY-5675: nitrate reduction V (assimilatory) | 2.3503180 | 0.0002165 |
| PWY-7115: C4 photosynthetic carbon assimilation cycle, NAD-ME type | 2.3283740 | 0.0027060 |
| PWY0-1061: superpathway of L-alanine biosynthesis | 2.2840671 | 0.0051978 |
| PWY-4041: γ-glutamyl cycle | 1.9487025 | 0.0087551 |
| PWY-5067: glycogen biosynthesis II (from UDP-D-Glucose) | -0.3234633 | 0.0088135 |
Negative fold changes are higher in blood samples while positive are higher in buccal_euk samples.
blse = pathways.loc[:, ['Blood_Buccal_euk_FC', 'Blood_Buccal_euk_p']]
blse = blse[blse['Blood_Buccal_euk_p'] < 0.01]
blse = blse.sort_values(by=['Blood_Buccal_euk_FC'], ascending=False).rename(columns={'Blood_Buccal_euk_FC':'Log2 fold change', 'Blood_Buccal_euk_p':'p'})
blse = pd.concat([blse.head(n=50), blse.tail(n=50)])
blse = blse.groupby(by=blse.index, axis=0).mean()
blse.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$blse %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| PWY-7219: adenosine ribonucleotides de novo biosynthesis | 4.082921 | 0.0000073 |
| PWY-7220: adenosine deoxyribonucleotides de novo biosynthesis II | 3.944994 | 0.0000159 |
| PWY-7222: guanosine deoxyribonucleotides de novo biosynthesis II | 3.944994 | 0.0000159 |
| PWY-7228: superpathway of guanosine nucleotides de novo biosynthesis I | 3.490060 | 0.0001084 |
| PWY-6125: superpathway of guanosine nucleotides de novo biosynthesis II | 3.447454 | 0.0001879 |
| PWY-7221: guanosine ribonucleotides de novo biosynthesis | 3.315561 | 0.0002034 |
| VALSYN-PWY: L-valine biosynthesis | 3.297722 | 0.0000880 |
| ILEUSYN-PWY: L-isoleucine biosynthesis I (from threonine) | 3.297722 | 0.0000880 |
| PWY-6121: 5-aminoimidazole ribonucleotide biosynthesis I | 3.107286 | 0.0077383 |
| PWY-6122: 5-aminoimidazole ribonucleotide biosynthesis II | 3.100352 | 0.0051319 |
| PWY-6277: superpathway of 5-aminoimidazole ribonucleotide biosynthesis | 3.100352 | 0.0051319 |
| PWY-6609: adenine and adenosine salvage III | 3.068784 | 0.0002464 |
| PWY66-400: glycolysis VI (metazoan) | 2.818712 | 0.0003861 |
| UDPNAGSYN-PWY: UDP-N-acetyl-D-glucosamine biosynthesis I | 2.659206 | 0.0045700 |
| PWY-6124: inosine-5’-phosphate biosynthesis II | 2.523542 | 0.0044726 |
| GLYCOLYSIS: glycolysis I (from glucose 6-phosphate) | 2.276063 | 0.0021645 |
| PWY-5484: glycolysis II (from fructose 6-phosphate) | 1.884414 | 0.0032187 |
| SER-GLYSYN-PWY: superpathway of L-serine and glycine biosynthesis I | 1.049353 | 0.0000346 |
| PWY-3781: aerobic respiration I (cytochrome c) | -3.487774 | 0.0023437 |
pathways = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann3/pathways.csv', header=0, index_col=0)
Negative fold changes are higher in blood samples while positive are higher in buccal_euk samples.
bs3 = pathways.loc[:, ['Saliva_Blood_FC', 'Saliva_Blood_p']]
bs3 = bs3[bs3['Saliva_Blood_p'] < 0.01]
bs3 = bs3.sort_values(by=['Saliva_Blood_FC'], ascending=False).rename(columns={'Saliva_Blood_FC':'Log2 fold change', 'Saliva_Blood_p':'p'})
bs3 = pd.concat([bs3.head(n=50), bs3.tail(n=50)])
bs3 = bs3.groupby(by=bs3.index, axis=0).mean()
bs3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bs3 %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| PWY-5304: superpathway of sulfur oxidation (Acidianus ambivalens) | 1.369967 | 0.0015278 |
| PWY66-201: nicotine degradation IV | -1.339120 | 0.0066475 |
| PWY-6823: molybdenum cofactor biosynthesis | -1.550877 | 0.0000889 |
| PWY-6595: superpathway of guanosine nucleotides degradation (plants) | -1.576811 | 0.0005615 |
| PWY-7031: protein N-glycosylation (bacterial) | -1.601721 | 0.0066482 |
| GLUCUROCAT-PWY: superpathway of β-D-glucuronide and D-glucuronate degradation | -1.698105 | 0.0057003 |
| KETOGLUCONMET-PWY: ketogluconate metabolism | -1.776304 | 0.0004196 |
| PWY-6891: thiazole biosynthesis II (Bacillus) | -1.820555 | 0.0097794 |
| PWY-6749: CMP-legionaminate biosynthesis I | -1.882757 | 0.0009012 |
| PWY-7242: D-fructuronate degradation | -2.076551 | 0.0021045 |
| PWY-6507: 4-deoxy-L-threo-hex-4-enopyranuronate degradation | -2.118720 | 0.0024219 |
| PWY-6992: 1,5-anhydrofructose degradation | -2.156391 | 0.0001124 |
| PWY-6748: nitrate reduction VII (denitrification) | -2.373015 | 0.0004265 |
| PWY-5022: 4-aminobutanoate degradation V | -2.375872 | 0.0040979 |
| PWY-6708: ubiquinol-8 biosynthesis (prokaryotic) | -2.475866 | 0.0011876 |
| PWY-5857: ubiquinol-10 biosynthesis (prokaryotic) | -2.475866 | 0.0011876 |
| PWY-5856: ubiquinol-9 biosynthesis (prokaryotic) | -2.475866 | 0.0011876 |
| PWY-5855: ubiquinol-7 biosynthesis (prokaryotic) | -2.475866 | 0.0011876 |
| GOLPDLCAT-PWY: superpathway of glycerol degradation to 1,3-propanediol | -2.543814 | 0.0007921 |
| PWY-7210: pyrimidine deoxyribonucleotides biosynthesis from CTP | -2.683044 | 0.0002874 |
| PWY3O-355: stearate biosynthesis III (fungi) | -2.702749 | 0.0007630 |
| METHGLYUT-PWY: superpathway of methylglyoxal degradation | -2.706670 | 0.0000037 |
| PWY-5675: nitrate reduction V (assimilatory) | -2.761991 | 0.0000679 |
| PWY-5676: acetyl-CoA fermentation to butanoate II | -2.807641 | 0.0000129 |
| PWY-7371: 1,4-dihydroxy-6-naphthoate biosynthesis II | -2.833581 | 0.0047793 |
| P125-PWY: superpathway of (R,R)-butanediol biosynthesis | -2.837213 | 0.0000549 |
| P105-PWY: TCA cycle IV (2-oxoglutarate decarboxylase) | -2.885472 | 0.0000033 |
| DENITRIFICATION-PWY: nitrate reduction I (denitrification) | -2.986749 | 0.0004611 |
| PWY66-389: phytol degradation | -2.988662 | 0.0000608 |
| PWY-5994: palmitate biosynthesis I (animals and fungi) | -3.106828 | 0.0012030 |
| PWY-6892: thiazole biosynthesis I (E. coli) | -3.190007 | 0.0001594 |
| PWY-4702: phytate degradation I | -3.223278 | 0.0006150 |
| REDCITCYC: TCA cycle VIII (helicobacter) | -3.234659 | 0.0001317 |
| P161-PWY: acetylene degradation | -3.239657 | 0.0000163 |
| PWY-7254: TCA cycle VII (acetate-producers) | -3.250783 | 0.0001497 |
| PWY-5083: NAD/NADH phosphorylation and dephosphorylation | -3.274292 | 0.0005809 |
| ARGORNPROST-PWY: arginine, ornithine and proline interconversion | -3.346238 | 0.0002088 |
| PWY-5367: petroselinate biosynthesis | -3.364139 | 0.0047718 |
| PWY4LZ-257: superpathway of fermentation (Chlamydomonas reinhardtii) | -3.382913 | 0.0000188 |
| PWY-5723: Rubisco shunt | -3.516555 | 0.0001887 |
| PWY-7385: 1,3-propanediol biosynthesis (engineered) | -3.546559 | 0.0001336 |
| PWY-7003: glycerol degradation to butanol | -3.558627 | 0.0002047 |
| SO4ASSIM-PWY: sulfate reduction I (assimilatory) | -3.603495 | 0.0021792 |
| PWY-7456: mannan degradation | -3.612446 | 0.0000491 |
| PWY-6606: guanosine nucleotides degradation II | -3.629806 | 0.0005633 |
| PWY-922: mevalonate pathway I | -3.678726 | 0.0000316 |
| P122-PWY: heterolactic fermentation | -3.710629 | 0.0000636 |
| PWY-5384: sucrose degradation IV (sucrose phosphorylase) | -3.749889 | 0.0001721 |
| PWY-6588: pyruvate fermentation to acetone | -3.750906 | 0.0030886 |
| PWY-5265: peptidoglycan biosynthesis II (staphylococci) | -3.817206 | 0.0002463 |
| PWY0-1586: peptidoglycan maturation (meso-diaminopimelate containing) | -7.066243 | 0.0000015 |
| PWY-7187: pyrimidine deoxyribonucleotides de novo biosynthesis II | -7.066540 | 0.0000008 |
| PWY-3001: superpathway of L-isoleucine biosynthesis I | -7.081882 | 0.0000011 |
| PWY-6147: 6-hydroxymethyl-dihydropterin diphosphate biosynthesis I | -7.096266 | 0.0000063 |
| PWY-5973: cis-vaccenate biosynthesis | -7.109034 | 0.0000015 |
| PWY-5103: L-isoleucine biosynthesis III | -7.117862 | 0.0000006 |
| BRANCHED-CHAIN-AA-SYN-PWY: superpathway of branched amino acid biosynthesis | -7.132368 | 0.0000006 |
| PWY-7184: pyrimidine deoxyribonucleotides de novo biosynthesis I | -7.139054 | 0.0000007 |
| PWY-7220: adenosine deoxyribonucleotides de novo biosynthesis II | -7.178015 | 0.0000006 |
| PWY-7222: guanosine deoxyribonucleotides de novo biosynthesis II | -7.178015 | 0.0000006 |
| PWY-4242: pantothenate and coenzyme A biosynthesis III | -7.188008 | 0.0000027 |
| PWY-6125: superpathway of guanosine nucleotides de novo biosynthesis II | -7.197799 | 0.0000007 |
| PWY0-166: superpathway of pyrimidine deoxyribonucleotides de novo biosynthesis (E. coli) | -7.222713 | 0.0000009 |
| ARO-PWY: chorismate biosynthesis I | -7.231900 | 0.0000016 |
| CALVIN-PWY: Calvin-Benson-Bassham cycle | -7.242959 | 0.0000031 |
| COA-PWY: coenzyme A biosynthesis I | -7.250466 | 0.0000027 |
| COMPLETE-ARO-PWY: superpathway of aromatic amino acid biosynthesis | -7.251726 | 0.0000013 |
| PWY-6163: chorismate biosynthesis from 3-dehydroquinate | -7.280855 | 0.0000021 |
| DENOVOPURINE2-PWY: superpathway of purine nucleotides de novo biosynthesis II | -7.282089 | 0.0000007 |
| PWY-7228: superpathway of guanosine nucleotides de novo biosynthesis I | -7.285223 | 0.0000008 |
| PWY-6151: S-adenosyl-L-methionine cycle I | -7.288070 | 0.0000005 |
| VALSYN-PWY: L-valine biosynthesis | -7.294140 | 0.0000004 |
| ILEUSYN-PWY: L-isoleucine biosynthesis I (from threonine) | -7.294140 | 0.0000004 |
| ANAGLYCOLYSIS-PWY: glycolysis III (from glucose) | -7.320729 | 0.0000011 |
| PWY-6700: queuosine biosynthesis | -7.322231 | 0.0000025 |
| COA-PWY-1: coenzyme A biosynthesis II (mammalian) | -7.335901 | 0.0000013 |
| PWY-6124: inosine-5’-phosphate biosynthesis II | -7.335926 | 0.0000015 |
| PWY-841: superpathway of purine nucleotides de novo biosynthesis I | -7.345385 | 0.0000009 |
| PWY-6126: superpathway of adenosine nucleotides de novo biosynthesis II | -7.381062 | 0.0000005 |
| PWY-6609: adenine and adenosine salvage III | -7.383676 | 0.0000011 |
| PWY-1042: glycolysis IV (plant cytosol) | -7.390269 | 0.0000024 |
| PWY-6123: inosine-5’-phosphate biosynthesis I | -7.391219 | 0.0000013 |
| PWY-6121: 5-aminoimidazole ribonucleotide biosynthesis I | -7.398170 | 0.0000013 |
| 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis | -7.398277 | 0.0000015 |
| PWY-5695: urate biosynthesis/inosine 5’-phosphate degradation | -7.436938 | 0.0000015 |
| TRNA-CHARGING-PWY: tRNA charging | -7.457226 | 0.0000019 |
| PWY-5686: UMP biosynthesis | -7.461998 | 0.0000017 |
| PWY-7111: pyruvate fermentation to isobutanol (engineered) | -7.474423 | 0.0000003 |
| PWY-6277: superpathway of 5-aminoimidazole ribonucleotide biosynthesis | -7.478766 | 0.0000007 |
| PWY-6122: 5-aminoimidazole ribonucleotide biosynthesis II | -7.478766 | 0.0000007 |
| PWY-3841: folate transformations II | -7.498662 | 0.0000022 |
| PWY-6385: peptidoglycan biosynthesis III (mycobacteria) | -7.510429 | 0.0000021 |
| PWY-7229: superpathway of adenosine nucleotides de novo biosynthesis I | -7.513373 | 0.0000006 |
| PWY-6386: UDP-N-acetylmuramoyl-pentapeptide biosynthesis II (lysine-containing) | -7.522371 | 0.0000017 |
| PWY-6387: UDP-N-acetylmuramoyl-pentapeptide biosynthesis I (meso-diaminopimelate containing) | -7.525967 | 0.0000018 |
| PEPTIDOGLYCANSYN-PWY: peptidoglycan biosynthesis I (meso-diaminopimelate containing) | -7.543174 | 0.0000020 |
| PWY-7221: guanosine ribonucleotides de novo biosynthesis | -7.577720 | 0.0000018 |
| PWY-7219: adenosine ribonucleotides de novo biosynthesis | -7.685244 | 0.0000015 |
| UNINTEGRATED | -18.392918 | 0.0000000 |
| UNMAPPED | -19.270640 | 0.0000000 |
Negative fold changes are higher in buccal samples while positive are higher in blood samples.
bb3 = pathways.loc[:, ['Buccal_Blood_FC', 'Buccal_Blood_p']]
bb3 = bb3[bb3['Buccal_Blood_p'] < 0.01]
bb3 = bb3.sort_values(by=['Buccal_Blood_FC'], ascending=False).rename(columns={'Buccal_Blood_FC':'Log2 fold change', 'Buccal_Blood_p':'p'})
bb3 = pd.concat([bb3.head(n=50), bb3.tail(n=50)])
bb3 = bb3.groupby(by=bb3.index, axis=0).mean()
bb3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bb3 %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| GLUCOSE1PMETAB-PWY: glucose and glucose-1-phosphate degradation | -1.953539 | 0.0012949 |
| PWY-7279: aerobic respiration II (cytochrome c) (yeast) | -2.071518 | 0.0000388 |
| HEMESYN2-PWY: heme biosynthesis II (anaerobic) | -2.375301 | 0.0004377 |
| PWY-5384: sucrose degradation IV (sucrose phosphorylase) | -2.491040 | 0.0019229 |
| PPGPPMET-PWY: ppGpp biosynthesis | -2.814633 | 0.0000632 |
| PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I | -2.855298 | 0.0018581 |
| P461-PWY: hexitol fermentation to lactate, formate, ethanol and acetate | -2.910877 | 0.0022583 |
| PWY-4981: L-proline biosynthesis II (from arginine) | -3.099472 | 0.0000642 |
| PANTO-PWY: phosphopantothenate biosynthesis I | -3.135412 | 0.0082527 |
| PANTOSYN-PWY: pantothenate and coenzyme A biosynthesis I | -3.202533 | 0.0052060 |
| ARGSYNBSUB-PWY: L-arginine biosynthesis II (acetyl cycle) | -3.317952 | 0.0004671 |
| PWY-7383: anaerobic energy metabolism (invertebrates, cytosol) | -3.350229 | 0.0000992 |
| ARGSYN-PWY: L-arginine biosynthesis I (via L-ornithine) | -3.399892 | 0.0011887 |
| HEXITOLDEGSUPER-PWY: superpathway of hexitol degradation (bacteria) | -3.577781 | 0.0022764 |
| PWY-621: sucrose degradation III (sucrose invertase) | -3.583786 | 0.0030284 |
| POLYISOPRENSYN-PWY: polyisoprenoid biosynthesis (E. coli) | -3.616207 | 0.0021288 |
| PWY-7198: pyrimidine deoxyribonucleotides de novo biosynthesis IV | -3.708284 | 0.0006673 |
| P161-PWY: acetylene degradation | -3.725179 | 0.0002659 |
| PWY-3781: aerobic respiration I (cytochrome c) | -3.786589 | 0.0000728 |
| PWY-7196: superpathway of pyrimidine ribonucleosides salvage | -3.812389 | 0.0016488 |
| HEME-BIOSYNTHESIS-II: heme biosynthesis I (aerobic) | -3.992538 | 0.0000650 |
| THISYNARA-PWY: superpathway of thiamin diphosphate biosynthesis III (eukaryotes) | -4.014917 | 0.0002446 |
| PWY0-1297: superpathway of purine deoxyribonucleosides degradation | -4.148703 | 0.0002220 |
| PWY-5188: tetrapyrrole biosynthesis I (from glutamate) | -4.198054 | 0.0000469 |
| NAD-BIOSYNTHESIS-II: NAD salvage pathway II | -4.322622 | 0.0003753 |
| SER-GLYSYN-PWY: superpathway of L-serine and glycine biosynthesis I | -4.347729 | 0.0005945 |
| PWY-5659: GDP-mannose biosynthesis | -4.412045 | 0.0003699 |
| PWY-7117: C4 photosynthetic carbon assimilation cycle, PEPCK type | -4.422857 | 0.0005355 |
| PWY66-409: superpathway of purine nucleotide salvage | -4.433302 | 0.0003692 |
| PWY-6897: thiamin salvage II | -4.438291 | 0.0018318 |
| P185-PWY: formaldehyde assimilation III (dihydroxyacetone cycle) | -4.440537 | 0.0015759 |
| PWY-6168: flavin biosynthesis III (fungi) | -4.483201 | 0.0004274 |
| PWY-5989: stearate biosynthesis II (bacteria and plants) | -4.567455 | 0.0018054 |
| PENTOSE-P-PWY: pentose phosphate pathway | -4.570613 | 0.0005848 |
| PWY-5484: glycolysis II (from fructose 6-phosphate) | -4.623735 | 0.0000903 |
| PWY-5667: CDP-diacylglycerol biosynthesis I | -4.660401 | 0.0000939 |
| PWY-7388: octanoyl-[acyl-carrier protein] biosynthesis (mitochondria, yeast) | -4.668495 | 0.0034495 |
| FASYN-INITIAL-PWY: superpathway of fatty acid biosynthesis initiation (E. coli) | -4.695887 | 0.0047357 |
| ANAEROFRUCAT-PWY: homolactic fermentation | -4.712941 | 0.0001589 |
| P4-PWY: superpathway of L-lysine, L-threonine and L-methionine biosynthesis I | -4.728063 | 0.0005894 |
| PWY-7539: 6-hydroxymethyl-dihydropterin diphosphate biosynthesis III (Chlamydia) | -4.732397 | 0.0006326 |
| PWY-7357: thiamin formation from pyrithiamine and oxythiamine (yeast) | -4.793122 | 0.0014301 |
| PWY0-1298: superpathway of pyrimidine deoxyribonucleosides degradation | -4.795474 | 0.0012426 |
| DAPLYSINESYN-PWY: L-lysine biosynthesis I | -4.817453 | 0.0000988 |
| PWY-5097: L-lysine biosynthesis VI | -4.903941 | 0.0001113 |
| PWY0-862: (5Z)-dodec-5-enoate biosynthesis | -4.916194 | 0.0010235 |
| PWY-6282: palmitoleate biosynthesis I (from (5Z)-dodec-5-enoate) | -4.924639 | 0.0010046 |
| HOMOSER-METSYN-PWY: L-methionine biosynthesis I | -5.004194 | 0.0009653 |
| PWY0-162: superpathway of pyrimidine ribonucleotides de novo biosynthesis | -5.006905 | 0.0009292 |
| DTDPRHAMSYN-PWY: dTDP-L-rhamnose biosynthesis I | -5.025458 | 0.0004047 |
| PWY-3841: folate transformations II | -5.772760 | 0.0002901 |
| PEPTIDOGLYCANSYN-PWY: peptidoglycan biosynthesis I (meso-diaminopimelate containing) | -5.777016 | 0.0002195 |
| COA-PWY-1: coenzyme A biosynthesis II (mammalian) | -5.784816 | 0.0001797 |
| PWY-6385: peptidoglycan biosynthesis III (mycobacteria) | -5.786133 | 0.0002306 |
| PWY-6700: queuosine biosynthesis | -5.788170 | 0.0003525 |
| 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis | -5.805661 | 0.0013852 |
| THRESYN-PWY: superpathway of L-threonine biosynthesis | -5.808601 | 0.0005263 |
| PWY-6387: UDP-N-acetylmuramoyl-pentapeptide biosynthesis I (meso-diaminopimelate containing) | -5.811435 | 0.0002288 |
| PWY66-400: glycolysis VI (metazoan) | -5.820233 | 0.0002662 |
| ANAGLYCOLYSIS-PWY: glycolysis III (from glucose) | -5.827153 | 0.0000650 |
| COMPLETE-ARO-PWY: superpathway of aromatic amino acid biosynthesis | -5.843970 | 0.0006464 |
| PWY-7197: pyrimidine deoxyribonucleotide phosphorylation | -5.861724 | 0.0021314 |
| PWY-6386: UDP-N-acetylmuramoyl-pentapeptide biosynthesis II (lysine-containing) | -5.864866 | 0.0003747 |
| TRPSYN-PWY: L-tryptophan biosynthesis | -5.887985 | 0.0005632 |
| PWY0-1586: peptidoglycan maturation (meso-diaminopimelate containing) | -5.916768 | 0.0005520 |
| PWY-6737: starch degradation V | -5.930443 | 0.0034960 |
| PWY-3001: superpathway of L-isoleucine biosynthesis I | -5.946788 | 0.0005302 |
| PWY-6151: S-adenosyl-L-methionine cycle I | -5.948886 | 0.0002898 |
| PWY-6936: seleno-amino acid biosynthesis | -5.970942 | 0.0000997 |
| PWY-7199: pyrimidine deoxyribonucleosides salvage | -6.001521 | 0.0051249 |
| PWY-7187: pyrimidine deoxyribonucleotides de novo biosynthesis II | -6.006174 | 0.0006348 |
| PWY0-1296: purine ribonucleosides degradation | -6.017901 | 0.0002293 |
| PWY0-166: superpathway of pyrimidine deoxyribonucleotides de novo biosynthesis (E. coli) | -6.040152 | 0.0006188 |
| PWY-7184: pyrimidine deoxyribonucleotides de novo biosynthesis I | -6.054700 | 0.0010350 |
| PWY-7221: guanosine ribonucleotides de novo biosynthesis | -6.076546 | 0.0000988 |
| TRNA-CHARGING-PWY: tRNA charging | -6.078022 | 0.0002090 |
| BRANCHED-CHAIN-AA-SYN-PWY: superpathway of branched amino acid biosynthesis | -6.103833 | 0.0006245 |
| PWY-5103: L-isoleucine biosynthesis III | -6.103833 | 0.0007868 |
| PWY-7208: superpathway of pyrimidine nucleobases salvage | -6.114675 | 0.0010667 |
| PWY-7234: inosine-5’-phosphate biosynthesis III | -6.126006 | 0.0005278 |
| PWY-7228: superpathway of guanosine nucleotides de novo biosynthesis I | -6.126660 | 0.0005909 |
| PWY-6124: inosine-5’-phosphate biosynthesis II | -6.127083 | 0.0002281 |
| PWY-6121: 5-aminoimidazole ribonucleotide biosynthesis I | -6.131049 | 0.0001478 |
| PWY-6125: superpathway of guanosine nucleotides de novo biosynthesis II | -6.140068 | 0.0004916 |
| PWY-841: superpathway of purine nucleotides de novo biosynthesis I | -6.150735 | 0.0003557 |
| PWY-6123: inosine-5’-phosphate biosynthesis I | -6.151446 | 0.0003128 |
| PWY-6122: 5-aminoimidazole ribonucleotide biosynthesis II | -6.151560 | 0.0001069 |
| PWY-6277: superpathway of 5-aminoimidazole ribonucleotide biosynthesis | -6.151560 | 0.0001069 |
| DENOVOPURINE2-PWY: superpathway of purine nucleotides de novo biosynthesis II | -6.166463 | 0.0003280 |
| VALSYN-PWY: L-valine biosynthesis | -6.271443 | 0.0000911 |
| ILEUSYN-PWY: L-isoleucine biosynthesis I (from threonine) | -6.271443 | 0.0000911 |
| PWY-6126: superpathway of adenosine nucleotides de novo biosynthesis II | -6.361994 | 0.0001041 |
| PWY-7222: guanosine deoxyribonucleotides de novo biosynthesis II | -6.379021 | 0.0000885 |
| PWY-7220: adenosine deoxyribonucleotides de novo biosynthesis II | -6.379021 | 0.0000885 |
| PWY-7229: superpathway of adenosine nucleotides de novo biosynthesis I | -6.439987 | 0.0000954 |
| PWY-7219: adenosine ribonucleotides de novo biosynthesis | -6.482576 | 0.0000908 |
| PWY-7111: pyruvate fermentation to isobutanol (engineered) | -6.483689 | 0.0000938 |
| PWY-6609: adenine and adenosine salvage III | -6.888554 | 0.0018737 |
| UNINTEGRATED | -17.960046 | 0.0000000 |
| UNMAPPED | -19.493154 | 0.0000000 |
Negative fold changes are higher in saliva samples while positive are higher in buccal samples.
sb3 = pathways.loc[:, ['Saliva_Buccal_FC', 'Saliva_Buccal_p']]
sb3 = sb3[sb3['Saliva_Buccal_p'] < 0.01]
sb3 = sb3.sort_values(by=['Saliva_Buccal_FC'], ascending=False).rename(columns={'Saliva_Buccal_FC':'Log2 fold change', 'Saliva_Buccal_p':'p'})
sb3 = pd.concat([sb3.head(n=50), sb3.tail(n=50)])
sb3 = sb3.groupby(by=sb3.index, axis=0).mean()
sb3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$sb3 %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| PWY-5304: superpathway of sulfur oxidation (Acidianus ambivalens) | 1.369967 | 0.0016945 |
| PWY66-201: nicotine degradation IV | -1.339120 | 0.0066475 |
| PWY-7031: protein N-glycosylation (bacterial) | -1.601721 | 0.0066482 |
| GLUCUROCAT-PWY: superpathway of β-D-glucuronide and D-glucuronate degradation | -1.698105 | 0.0057003 |
| PWY-6749: CMP-legionaminate biosynthesis I | -1.882757 | 0.0009012 |
| PWY66-389: phytol degradation | -2.000471 | 0.0082854 |
| PWY-7383: anaerobic energy metabolism (invertebrates, cytosol) | -2.073991 | 0.0015441 |
| PWY-7242: D-fructuronate degradation | -2.076551 | 0.0021045 |
| PWY-5097: L-lysine biosynthesis VI | -2.084571 | 0.0081699 |
| PWY-6507: 4-deoxy-L-threo-hex-4-enopyranuronate degradation | -2.118720 | 0.0024219 |
| PWY-6992: 1,5-anhydrofructose degradation | -2.156391 | 0.0001299 |
| PWY-5484: glycolysis II (from fructose 6-phosphate) | -2.195773 | 0.0058462 |
| ANAEROFRUCAT-PWY: homolactic fermentation | -2.206504 | 0.0074108 |
| HEME-BIOSYNTHESIS-II: heme biosynthesis I (aerobic) | -2.217211 | 0.0042655 |
| PWY-7328: superpathway of UDP-glucose-derived O-antigen building blocks biosynthesis | -2.230378 | 0.0077247 |
| PWY-5667: CDP-diacylglycerol biosynthesis I | -2.280912 | 0.0054802 |
| PWY-6748: nitrate reduction VII (denitrification) | -2.373015 | 0.0004265 |
| PWY-5188: tetrapyrrole biosynthesis I (from glutamate) | -2.397191 | 0.0038231 |
| PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I | -2.418632 | 0.0083616 |
| PWY-3781: aerobic respiration I (cytochrome c) | -2.426190 | 0.0041991 |
| PWY-6708: ubiquinol-8 biosynthesis (prokaryotic) | -2.475866 | 0.0012813 |
| PWY-5857: ubiquinol-10 biosynthesis (prokaryotic) | -2.475866 | 0.0012813 |
| PWY-5856: ubiquinol-9 biosynthesis (prokaryotic) | -2.475866 | 0.0012813 |
| PWY-5855: ubiquinol-7 biosynthesis (prokaryotic) | -2.475866 | 0.0012813 |
| PWY-7198: pyrimidine deoxyribonucleotides de novo biosynthesis IV | -2.559669 | 0.0079899 |
| ARGSYNBSUB-PWY: L-arginine biosynthesis II (acetyl cycle) | -2.598809 | 0.0084929 |
| PWY-4981: L-proline biosynthesis II (from arginine) | -2.601973 | 0.0016975 |
| PWY0-1415: superpathway of heme biosynthesis from uroporphyrinogen-III | -2.615203 | 0.0064639 |
| PWY0-1241: ADP-L-glycero-β-D-manno-heptose biosynthesis | -2.700916 | 0.0085394 |
| PWY3O-355: stearate biosynthesis III (fungi) | -2.702749 | 0.0020314 |
| HEMESYN2-PWY: heme biosynthesis II (anaerobic) | -2.724787 | 0.0091285 |
| REDCITCYC: TCA cycle VIII (helicobacter) | -2.758828 | 0.0039355 |
| PWY-7254: TCA cycle VII (acetate-producers) | -2.760869 | 0.0052888 |
| PWY-7371: 1,4-dihydroxy-6-naphthoate biosynthesis II | -2.833581 | 0.0047793 |
| P108-PWY: pyruvate fermentation to propanoate I | -2.846079 | 0.0028413 |
| P105-PWY: TCA cycle IV (2-oxoglutarate decarboxylase) | -2.885472 | 0.0003474 |
| PWY-2723: trehalose degradation V | -2.931790 | 0.0084902 |
| PWY-7279: aerobic respiration II (cytochrome c) (yeast) | -2.942390 | 0.0000997 |
| DENITRIFICATION-PWY: nitrate reduction I (denitrification) | -2.986749 | 0.0032300 |
| PWY-5994: palmitate biosynthesis I (animals and fungi) | -3.106828 | 0.0012030 |
| PPGPPMET-PWY: ppGpp biosynthesis | -3.146512 | 0.0012225 |
| PWY-6892: thiazole biosynthesis I (E. coli) | -3.190007 | 0.0037629 |
| PWY-4702: phytate degradation I | -3.223278 | 0.0006150 |
| PWY-6969: TCA cycle V (2-oxoglutarate:ferredoxin oxidoreductase) | -3.235096 | 0.0046632 |
| ARGORNPROST-PWY: arginine, ornithine and proline interconversion | -3.346238 | 0.0002088 |
| PWY-5690: TCA cycle II (plants and fungi) | -3.370347 | 0.0059353 |
| PANTO-PWY: phosphopantothenate biosynthesis I | -3.511841 | 0.0058816 |
| PWY-7385: 1,3-propanediol biosynthesis (engineered) | -3.546559 | 0.0003270 |
| PWY-7456: mannan degradation | -3.612446 | 0.0000491 |
| PANTOSYN-PWY: pantothenate and coenzyme A biosynthesis I | -3.619359 | 0.0063882 |
| PWY-6803: phosphatidylcholine acyl editing | -3.633404 | 0.0008398 |
| GLUCONEO-PWY: gluconeogenesis I | -3.646638 | 0.0079964 |
| COBALSYN-PWY: adenosylcobalamin salvage from cobinamide I | -3.673371 | 0.0053391 |
| CITRULBIO-PWY: L-citrulline biosynthesis | -3.737103 | 0.0051830 |
| NAGLIPASYN-PWY: lipid IVA biosynthesis | -3.823621 | 0.0055214 |
| PWY-5863: superpathway of phylloquinol biosynthesis | -3.910849 | 0.0071380 |
| PWY-5189: tetrapyrrole biosynthesis II (from glycine) | -3.915771 | 0.0038738 |
| PWY-6263: superpathway of menaquinol-8 biosynthesis II | -3.919003 | 0.0005096 |
| PWY-5104: L-isoleucine biosynthesis IV | -3.970444 | 0.0050378 |
| PWY-6901: superpathway of glucose and xylose degradation | -4.000096 | 0.0000287 |
| P164-PWY: purine nucleobases degradation I (anaerobic) | -4.051673 | 0.0052106 |
| POLYAMSYN-PWY: superpathway of polyamine biosynthesis I | -4.099309 | 0.0000917 |
| HISDEG-PWY: L-histidine degradation I | -4.126351 | 0.0039741 |
| PWY-5920: superpathway of heme biosynthesis from glycine | -4.135103 | 0.0012035 |
| PYRIDNUCSYN-PWY: NAD biosynthesis I (from aspartate) | -4.257678 | 0.0047888 |
| PWY-4984: urea cycle | -4.266707 | 0.0037367 |
| PWY-7115: C4 photosynthetic carbon assimilation cycle, NAD-ME type | -4.324324 | 0.0040105 |
| PWY-622: starch biosynthesis | -4.431540 | 0.0043576 |
| P42-PWY: incomplete reductive TCA cycle | -4.461541 | 0.0017764 |
| P23-PWY: reductive TCA cycle I | -4.472183 | 0.0001604 |
| PWY-5030: L-histidine degradation III | -4.472589 | 0.0070466 |
| PWY-6305: putrescine biosynthesis IV | -4.638622 | 0.0007453 |
| ARG+POLYAMINE-SYN: superpathway of arginine and polyamine biosynthesis | -4.767626 | 0.0000078 |
| PWY0-881: superpathway of fatty acid biosynthesis I (E. coli) | -4.795367 | 0.0017388 |
| COLANSYN-PWY: colanic acid building blocks biosynthesis | -4.879777 | 0.0003081 |
| PWY-6545: pyrimidine deoxyribonucleotides de novo biosynthesis III | -4.962008 | 0.0013138 |
| GLYCOLYSIS-E-D: superpathway of glycolysis and Entner-Doudoroff | -4.993373 | 0.0036655 |
| THISYN-PWY: superpathway of thiamin diphosphate biosynthesis I | -5.130474 | 0.0059325 |
| PWY-241: C4 photosynthetic carbon assimilation cycle, NADP-ME type | -5.381551 | 0.0006837 |
| PWY-5840: superpathway of menaquinol-7 biosynthesis | -5.806246 | 0.0039684 |
| PWY-7200: superpathway of pyrimidine deoxyribonucleoside salvage | -6.019793 | 0.0000074 |
Negative fold changes are higher in saliva samples while positive are higher in saliva_euk samples.
ss3 = pathways.loc[:, ['Saliva_Saliva_euk_FC', 'Saliva_Saliva_euk_p']]
ss3 = ss3[ss3['Saliva_Saliva_euk_p'] < 0.01]
ss3 = ss3.sort_values(by=['Saliva_Saliva_euk_FC'], ascending=False).rename(columns={'Saliva_Saliva_euk_FC':'Log2 fold change', 'Saliva_Saliva_euk_p':'p'})
ss3 = pd.concat([ss3.head(n=50), ss3.tail(n=50)])
ss3 = ss3.groupby(by=ss3.index, axis=0).mean()
ss3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$ss3 %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| PWY-7279: aerobic respiration II (cytochrome c) (yeast) | -1.370885 | 0.0087048 |
| KETOGLUCONMET-PWY: ketogluconate metabolism | -1.776304 | 0.0026801 |
| PWY-5675: nitrate reduction V (assimilatory) | -2.236857 | 0.0068771 |
| P125-PWY: superpathway of (R,R)-butanediol biosynthesis | -2.559305 | 0.0020286 |
| METHGLYUT-PWY: superpathway of methylglyoxal degradation | -2.706670 | 0.0094885 |
| PWY-5676: acetyl-CoA fermentation to butanoate II | -2.771931 | 0.0070687 |
| P105-PWY: TCA cycle IV (2-oxoglutarate decarboxylase) | -2.885472 | 0.0000033 |
| PWY-6901: superpathway of glucose and xylose degradation | -3.022929 | 0.0067578 |
| PWY-5994: palmitate biosynthesis I (animals and fungi) | -3.106828 | 0.0012030 |
| PWY-5265: peptidoglycan biosynthesis II (staphylococci) | -3.817206 | 0.0002463 |
Negative fold changes are higher in buccal samples while positive are higher in buccal_euk samples.
bbe3 = pathways.loc[:, ['Buccal_Buccal_euk_FC', 'Buccal_Buccal_euk_p']]
bbe3 = bbe3[bbe3['Buccal_Buccal_euk_p'] < 0.01]
bbe3 = bbe3.sort_values(by=['Buccal_Buccal_euk_FC'], ascending=False).rename(columns={'Buccal_Buccal_euk_FC':'Log2 fold change', 'Buccal_Buccal_euk_p':'p'})
bbe3 = pd.concat([bbe3.head(n=50), bbe3.tail(n=50)])
bbe3 = bbe3.groupby(by=bbe3.index, axis=0).mean()
bbe3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bbe3 %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| PWY-7279: aerobic respiration II (cytochrome c) (yeast) | -2.071518 | 0.0000388 |
| HEMESYN2-PWY: heme biosynthesis II (anaerobic) | -2.375301 | 0.0004377 |
| PWY-5384: sucrose degradation IV (sucrose phosphorylase) | -2.491040 | 0.0019229 |
| PPGPPMET-PWY: ppGpp biosynthesis | -2.814633 | 0.0000632 |
| PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I | -2.855298 | 0.0018581 |
| PWY-4981: L-proline biosynthesis II (from arginine) | -3.099472 | 0.0008894 |
| ARGSYNBSUB-PWY: L-arginine biosynthesis II (acetyl cycle) | -3.317952 | 0.0080073 |
| PWY-7383: anaerobic energy metabolism (invertebrates, cytosol) | -3.350229 | 0.0064698 |
| POLYISOPRENSYN-PWY: polyisoprenoid biosynthesis (E. coli) | -3.616207 | 0.0092780 |
| PWY-7198: pyrimidine deoxyribonucleotides de novo biosynthesis IV | -3.708284 | 0.0069478 |
| PWY-3781: aerobic respiration I (cytochrome c) | -3.786589 | 0.0084026 |
| PWY-7196: superpathway of pyrimidine ribonucleosides salvage | -3.812389 | 0.0016488 |
| HEME-BIOSYNTHESIS-II: heme biosynthesis I (aerobic) | -3.992538 | 0.0046983 |
| SER-GLYSYN-PWY: superpathway of L-serine and glycine biosynthesis I | -4.135579 | 0.0035870 |
| PWY-5188: tetrapyrrole biosynthesis I (from glutamate) | -4.198054 | 0.0008200 |
| PWY-5097: L-lysine biosynthesis VI | -4.226292 | 0.0065865 |
| NAD-BIOSYNTHESIS-II: NAD salvage pathway II | -4.322622 | 0.0003753 |
| PWY-5659: GDP-mannose biosynthesis | -4.412045 | 0.0003873 |
| PWY-7117: C4 photosynthetic carbon assimilation cycle, PEPCK type | -4.422857 | 0.0005355 |
| PWY66-409: superpathway of purine nucleotide salvage | -4.433302 | 0.0003692 |
| P185-PWY: formaldehyde assimilation III (dihydroxyacetone cycle) | -4.440537 | 0.0015759 |
| PWY-6168: flavin biosynthesis III (fungi) | -4.483201 | 0.0016972 |
| PENTOSE-P-PWY: pentose phosphate pathway | -4.570613 | 0.0005848 |
| PWY-5667: CDP-diacylglycerol biosynthesis I | -4.660401 | 0.0010937 |
| PWY-2942: L-lysine biosynthesis III | -4.698373 | 0.0044761 |
| P4-PWY: superpathway of L-lysine, L-threonine and L-methionine biosynthesis I | -4.728063 | 0.0005894 |
| DAPLYSINESYN-PWY: L-lysine biosynthesis I | -4.817453 | 0.0000988 |
| PWY0-162: superpathway of pyrimidine ribonucleotides de novo biosynthesis | -5.006905 | 0.0077868 |
| PWY-5913: TCA cycle VI (obligate autotrophs) | -5.533614 | 0.0031467 |
Negative fold changes are higher in blood samples while positive are higher in saliva_euk samples.
blbe3 = pathways.loc[:, ['Blood_Saliva_euk_FC', 'Blood_Saliva_euk_p']]
blbe3 = blbe3[blbe3['Blood_Saliva_euk_p'] < 0.01]
blbe3 = blbe3.sort_values(by=['Blood_Saliva_euk_FC'], ascending=False).rename(columns={'Blood_Saliva_euk_FC':'Log2 fold change', 'Blood_Saliva_euk_p':'p'})
blbe3 = pd.concat([blbe3.head(n=50), blbe3.tail(n=50)])
blbe3 = blbe3.groupby(by=blbe3.index, axis=0).mean()
blbe3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$blbe3 %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| UNMAPPED | 19.493011 | 0.0000000 |
| UNINTEGRATED | 17.939282 | 0.0000000 |
| PWY-7219: adenosine ribonucleotides de novo biosynthesis | 6.110646 | 0.0007535 |
| PWY-7229: superpathway of adenosine nucleotides de novo biosynthesis I | 6.100552 | 0.0011918 |
| PWY-7222: guanosine deoxyribonucleotides de novo biosynthesis II | 6.095096 | 0.0016145 |
| PWY-7220: adenosine deoxyribonucleotides de novo biosynthesis II | 6.095096 | 0.0016145 |
| PWY-6126: superpathway of adenosine nucleotides de novo biosynthesis II | 5.979035 | 0.0012916 |
| PWY-7221: guanosine ribonucleotides de novo biosynthesis | 5.877347 | 0.0006632 |
| PWY-7111: pyruvate fermentation to isobutanol (engineered) | 5.814683 | 0.0001827 |
| PEPTIDOGLYCANSYN-PWY: peptidoglycan biosynthesis I (meso-diaminopimelate containing) | 5.791791 | 0.0025942 |
| PWY-6387: UDP-N-acetylmuramoyl-pentapeptide biosynthesis I (meso-diaminopimelate containing) | 5.773506 | 0.0018236 |
| PWY-6386: UDP-N-acetylmuramoyl-pentapeptide biosynthesis II (lysine-containing) | 5.757256 | 0.0024790 |
| TRNA-CHARGING-PWY: tRNA charging | 5.754887 | 0.0006745 |
| PWY-6700: queuosine biosynthesis | 5.734196 | 0.0005906 |
| PWY-3841: folate transformations II | 5.726477 | 0.0011950 |
| PWY-6385: peptidoglycan biosynthesis III (mycobacteria) | 5.714218 | 0.0022687 |
| PWY-1042: glycolysis IV (plant cytosol) | 5.701539 | 0.0023823 |
| ILEUSYN-PWY: L-isoleucine biosynthesis I (from threonine) | 5.671574 | 0.0004770 |
| VALSYN-PWY: L-valine biosynthesis | 5.671574 | 0.0004770 |
| PWY-5686: UMP biosynthesis | 5.646300 | 0.0020403 |
| ANAGLYCOLYSIS-PWY: glycolysis III (from glucose) | 5.633261 | 0.0016283 |
| PWY-6151: S-adenosyl-L-methionine cycle I | 5.624810 | 0.0000442 |
| PWY-6147: 6-hydroxymethyl-dihydropterin diphosphate biosynthesis I | 5.572692 | 0.0006148 |
| PWY-6123: inosine-5’-phosphate biosynthesis I | 5.570520 | 0.0014188 |
| PWY66-400: glycolysis VI (metazoan) | 5.548027 | 0.0016291 |
| PWY0-166: superpathway of pyrimidine deoxyribonucleotides de novo biosynthesis (E. coli) | 5.546212 | 0.0011415 |
| PWY-6124: inosine-5’-phosphate biosynthesis II | 5.509068 | 0.0011130 |
| PWY-7228: superpathway of guanosine nucleotides de novo biosynthesis I | 5.492656 | 0.0006684 |
| PWY-6125: superpathway of guanosine nucleotides de novo biosynthesis II | 5.492486 | 0.0008237 |
| PWY-5973: cis-vaccenate biosynthesis | 5.444498 | 0.0001009 |
| GLYCOLYSIS: glycolysis I (from glucose 6-phosphate) | 5.431052 | 0.0011567 |
| COA-PWY-1: coenzyme A biosynthesis II (mammalian) | 5.430403 | 0.0020649 |
| BRANCHED-CHAIN-AA-SYN-PWY: superpathway of branched amino acid biosynthesis | 5.419439 | 0.0012262 |
| PWY-5103: L-isoleucine biosynthesis III | 5.419439 | 0.0019216 |
| PWY-2942: L-lysine biosynthesis III | 5.392441 | 0.0019641 |
| PWY-7663: gondoate biosynthesis (anaerobic) | 5.381270 | 0.0000442 |
| PWY-7184: pyrimidine deoxyribonucleotides de novo biosynthesis I | 5.362463 | 0.0010197 |
| COA-PWY: coenzyme A biosynthesis I | 5.361069 | 0.0015540 |
| CALVIN-PWY: Calvin-Benson-Bassham cycle | 5.359116 | 0.0011099 |
| PWY-5484: glycolysis II (from fructose 6-phosphate) | 5.324635 | 0.0017712 |
| PWY-5097: L-lysine biosynthesis VI | 5.319435 | 0.0023547 |
| PWY-4242: pantothenate and coenzyme A biosynthesis III | 5.305762 | 0.0035591 |
| PWY-3001: superpathway of L-isoleucine biosynthesis I | 5.269120 | 0.0023617 |
| GLUCONEO-PWY: gluconeogenesis I | 5.202574 | 0.0006711 |
| ANAEROFRUCAT-PWY: homolactic fermentation | 5.142470 | 0.0018552 |
| PWY-7208: superpathway of pyrimidine nucleobases salvage | 5.137379 | 0.0034481 |
| THRESYN-PWY: superpathway of L-threonine biosynthesis | 5.117801 | 0.0032271 |
| HSERMETANA-PWY: L-methionine biosynthesis III | 5.061252 | 0.0030230 |
| PANTOSYN-PWY: pantothenate and coenzyme A biosynthesis I | 5.037823 | 0.0029191 |
| PWY-6936: seleno-amino acid biosynthesis | 5.015933 | 0.0002031 |
| ASPASN-PWY: superpathway of L-aspartate and L-asparagine biosynthesis | 5.004985 | 0.0008390 |
| PWY-7199: pyrimidine deoxyribonucleosides salvage | 4.999708 | 0.0057875 |
| PANTO-PWY: phosphopantothenate biosynthesis I | 4.998813 | 0.0045278 |
| FASYN-ELONG-PWY: fatty acid elongation – saturated | 4.897003 | 0.0000209 |
| PWY-6737: starch degradation V | 4.863308 | 0.0016062 |
| NONOXIPENT-PWY: pentose phosphate pathway (non-oxidative branch) | 4.859926 | 0.0007491 |
| PWY-7197: pyrimidine deoxyribonucleotide phosphorylation | 4.854472 | 0.0010486 |
| SER-GLYSYN-PWY: superpathway of L-serine and glycine biosynthesis I | 4.850115 | 0.0014276 |
| PWY-7234: inosine-5’-phosphate biosynthesis III | 4.834209 | 0.0012272 |
| PWY-7664: oleate biosynthesis IV (anaerobic) | 4.833572 | 0.0000237 |
| PWY0-862: (5Z)-dodec-5-enoate biosynthesis | 4.753398 | 0.0000225 |
| PWY-6282: palmitoleate biosynthesis I (from (5Z)-dodec-5-enoate) | 4.750186 | 0.0000219 |
| PWY-3781: aerobic respiration I (cytochrome c) | 4.741331 | 0.0000165 |
| DAPLYSINESYN-PWY: L-lysine biosynthesis I | 4.563070 | 0.0011296 |
| PWY-5989: stearate biosynthesis II (bacteria and plants) | 4.543679 | 0.0000382 |
| PWY-7198: pyrimidine deoxyribonucleotides de novo biosynthesis IV | 4.457328 | 0.0003134 |
| PWY66-422: D-galactose degradation V (Leloir pathway) | 4.362995 | 0.0019013 |
| HOMOSER-METSYN-PWY: L-methionine biosynthesis I | 4.328386 | 0.0003852 |
| GLYCOGENSYNTH-PWY: glycogen biosynthesis I (from ADP-D-Glucose) | 4.144839 | 0.0012492 |
| PENTOSE-P-PWY: pentose phosphate pathway | 4.018131 | 0.0014803 |
| TCA: TCA cycle I (prokaryotic) | 3.929039 | 0.0002437 |
| PWY-6317: galactose degradation I (Leloir pathway) | 3.887112 | 0.0075802 |
| PWY0-1479: tRNA processing | 3.874353 | 0.0007041 |
| PWY-7117: C4 photosynthetic carbon assimilation cycle, PEPCK type | 3.783430 | 0.0002497 |
| PWY-7279: aerobic respiration II (cytochrome c) (yeast) | 3.643022 | 0.0000058 |
| PWY-5913: TCA cycle VI (obligate autotrophs) | 3.351989 | 0.0006523 |
| HEMESYN2-PWY: heme biosynthesis II (anaerobic) | 3.208226 | 0.0003164 |
| GLYCOLYSIS-E-D: superpathway of glycolysis and Entner-Doudoroff | 3.185600 | 0.0011206 |
| PWY0-1241: ADP-L-glycero-β-D-manno-heptose biosynthesis | 3.132300 | 0.0043060 |
| PWY-5173: superpathway of acetyl-CoA biosynthesis | 2.832510 | 0.0022145 |
| PWY-6803: phosphatidylcholine acyl editing | 2.676857 | 0.0000205 |
| SO4ASSIM-PWY: sulfate reduction I (assimilatory) | 2.247477 | 0.0014842 |
| GLUCOSE1PMETAB-PWY: glucose and glucose-1-phosphate degradation | 2.100950 | 0.0022532 |
| PWY-5505: L-glutamate and L-glutamine biosynthesis | 1.236371 | 0.0014493 |
Negative fold changes are higher in blood samples while positive are higher in buccal_euk samples.
blse3 = pathways.loc[:, ['Blood_Buccal_euk_FC', 'Blood_Buccal_euk_p']]
blse3 = blse3[blse3['Blood_Buccal_euk_p'] < 0.01]
blse3 = blse3.sort_values(by=['Blood_Buccal_euk_FC'], ascending=False).rename(columns={'Blood_Buccal_euk_FC':'Log2 fold change', 'Blood_Buccal_euk_p':'p'})
blse3 = pd.concat([blse3.head(n=50), blse.tail(n=50)])
blse3 = blse3.groupby(by=blse3.index, axis=0).mean()
blse3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$blse3 %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| UNMAPPED | 19.525104 | 0.0000000 |
| UNINTEGRATED | 17.899060 | 0.0000000 |
| PWY-7219: adenosine ribonucleotides de novo biosynthesis | 4.082921 | 0.0000073 |
| PWY-7222: guanosine deoxyribonucleotides de novo biosynthesis II | 3.944994 | 0.0000159 |
| PWY-7220: adenosine deoxyribonucleotides de novo biosynthesis II | 3.944994 | 0.0000159 |
| PWY-7228: superpathway of guanosine nucleotides de novo biosynthesis I | 3.490060 | 0.0001084 |
| PWY-6125: superpathway of guanosine nucleotides de novo biosynthesis II | 3.447454 | 0.0001879 |
| ILEUSYN-PWY: L-isoleucine biosynthesis I (from threonine) | 3.297722 | 0.0000880 |
| VALSYN-PWY: L-valine biosynthesis | 3.297722 | 0.0000880 |
| PWY-6121: 5-aminoimidazole ribonucleotide biosynthesis I | 3.107286 | 0.0077383 |
| PWY-6277: superpathway of 5-aminoimidazole ribonucleotide biosynthesis | 3.100352 | 0.0051319 |
| PWY-6122: 5-aminoimidazole ribonucleotide biosynthesis II | 3.100352 | 0.0051319 |
| PWY-7221: guanosine ribonucleotides de novo biosynthesis | 3.096631 | 0.0028047 |
| PWY-6609: adenine and adenosine salvage III | 3.068784 | 0.0002464 |
| PWY66-400: glycolysis VI (metazoan) | 2.818712 | 0.0003861 |
| UDPNAGSYN-PWY: UDP-N-acetyl-D-glucosamine biosynthesis I | 2.659206 | 0.0045700 |
| PWY-6151: S-adenosyl-L-methionine cycle I | 2.537255 | 0.0016117 |
| PWY-6124: inosine-5’-phosphate biosynthesis II | 2.523542 | 0.0044726 |
| GLYCOLYSIS: glycolysis I (from glucose 6-phosphate) | 2.276063 | 0.0021645 |
| PWY-5484: glycolysis II (from fructose 6-phosphate) | 1.884414 | 0.0032187 |
| SER-GLYSYN-PWY: superpathway of L-serine and glycine biosynthesis I | 1.049353 | 0.0000346 |
| PWY-3781: aerobic respiration I (cytochrome c) | -3.487774 | 0.0023437 |
For each of these, I am initially filtering out to only P values that are below 0.01, and then am taking only the most up- or down-regulated gene families and printing them to a table.
For HUMAnN2 I am now looking at the gene families assigned to the Uniref50 database.
genes = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann/processing/genes_50.csv', header=0, index_col=0)
Negative fold changes are higher in saliva samples while positive are higher in blood samples.
bs = genes.loc[:, ['Saliva_Blood_FC', 'Saliva_Blood_p']]
bs = bs[bs['Saliva_Blood_p'] < 0.01]
bs = bs.sort_values(by=['Saliva_Blood_FC'], ascending=False).rename(columns={'Saliva_Blood_FC':'Log2 fold change', 'Saliva_Blood_p':'p'})
bs = pd.concat([bs.head(n=50), bs.tail(n=50)])
bs.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bs %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| UniRef50_S9WFI9 | 6.629314 | 0.0001764 |
| UniRef50_H9FCW3: Mediator of DNA damage checkpoint protein 1 (Fragment) | 5.212867 | 0.0001696 |
| UniRef50_H9FCS1: Mediator of DNA damage checkpoint protein 1 (Fragment) | 5.185712 | 0.0002440 |
| UniRef50_Q1AN79: MHC class II antigen (Fragment) | 4.838595 | 0.0009648 |
| UniRef50_UPI00039927F1: PREDICTED: zinc finger protein 551-like | 4.685647 | 0.0071851 |
| UniRef50_L5KJU1: 60S ribosomal protein L4 | 4.583985 | 0.0003444 |
| UniRef50_X1FQ44: Marine sediment metagenome DNA, contig: S03H2_L09072 (Fragment) | 4.307323 | 0.0003196 |
| UniRef50_UPI00045E1407 | 4.156456 | 0.0085930 |
| UniRef50_Q29269: Orf protein (Fragment) | 4.114960 | 0.0010203 |
| UniRef50_V8PE67: Tyrosine-protein phosphatase non-receptor type 11 (Fragment) | 3.962689 | 0.0019819 |
| UniRef50_H2PP93 | 3.712350 | 0.0091976 |
| UniRef50_UPI0003ABAA48: PREDICTED: olfactory receptor 5F1-like | 3.710408 | 0.0097286 |
| UniRef50_Q1LZJ1: Arl4c protein (Fragment) | 3.627737 | 0.0050527 |
| UniRef50_M9WM38: Heat shock cognate 70 isoform 2 (Fragment) | 3.598336 | 0.0003457 |
| UniRef50_UPI0003AB5B64 | 3.551438 | 0.0043191 |
| UniRef50_F8LAC9 | 3.533322 | 0.0009125 |
| UniRef50_J9E765 | 3.377193 | 0.0000095 |
| UniRef50_UPI000387AFAD: PREDICTED: NADH-ubiquinone oxidoreductase chain 5-like isoform X1 | 3.339387 | 0.0012347 |
| UniRef50_UPI0002ADB57A | 3.313237 | 0.0067151 |
| UniRef50_I0EZ67: Mucin 3A (Fragment) | 3.306479 | 0.0096600 |
| UniRef50_UPI0003AB8CC4 | 3.305078 | 0.0036731 |
| UniRef50_UPI0003EB058D: PREDICTED: NADH-ubiquinone oxidoreductase chain 5-like isoform X2 | 3.281868 | 0.0038307 |
| UniRef50_UPI0003EB002A | 3.229292 | 0.0019300 |
| UniRef50_UPI0003EAFECD: PREDICTED: mucin-3A-like, partial | 3.210186 | 0.0055594 |
| UniRef50_A0A016SQN7 | 3.125134 | 0.0045341 |
| UniRef50_V4LU59 | 3.021396 | 0.0009296 |
| UniRef50_UPI00045DC74A: PREDICTED: mucin-3A-like | 2.962924 | 0.0042566 |
| UniRef50_UPI00045DA955 | 2.930116 | 0.0017526 |
| UniRef50_A0A024QYZ5: HCG1796935, isoform CRA_a | 2.913677 | 0.0041174 |
| UniRef50_R4GJT7 | 2.900382 | 0.0002836 |
| UniRef50_B4DF53: cDNA FLJ53551, highly similar to Zinc finger protein 248 | 2.897411 | 0.0000542 |
| UniRef50_B7P3Z5 | 2.889594 | 0.0019244 |
| UniRef50_Q5U4M3: LOC495467 protein (Fragment) | 2.850899 | 0.0031476 |
| UniRef50_M3ZCM1 | 2.828006 | 0.0012047 |
| UniRef50_UPI0003EB02DB: PREDICTED: homeobox protein aristaless-like | 2.823574 | 0.0022403 |
| UniRef50_A7RSV6: Predicted protein | 2.817247 | 0.0039070 |
| UniRef50_F6QNF7 | 2.800879 | 0.0069183 |
| UniRef50_UPI00046B6145: PREDICTED: putative double homeobox protein 2, partial | 2.763190 | 0.0002367 |
| UniRef50_UPI0003EAF13E: PREDICTED: LOW QUALITY PROTEIN: mucin-3A, partial | 2.751814 | 0.0054069 |
| UniRef50_M3ZBB0 | 2.728827 | 0.0010611 |
| UniRef50_UPI00045DF71E: PREDICTED: LOW QUALITY PROTEIN: double homeobox protein 4C-like | 2.725843 | 0.0019888 |
| UniRef50_UPI00020AF5A5: PREDICTED: double homeobox protein 4-like protein 4-like | 2.723618 | 0.0018227 |
| UniRef50_M3ZBZ2 | 2.703242 | 0.0009854 |
| UniRef50_W4HSC3 | 2.689705 | 0.0006538 |
| UniRef50_G7K0E8 | 2.687715 | 0.0000253 |
| UniRef50_C6GLY9 | 2.685302 | 0.0043274 |
| UniRef50_X1Q312: Marine sediment metagenome DNA, contig: S06H3_S28886 (Fragment) | 2.684738 | 0.0006639 |
| UniRef50_M3Z906 | 2.675682 | 0.0009384 |
| UniRef50_UPI0003EAE9FB: PREDICTED: double homeobox protein 4-like protein 4-like | 2.652845 | 0.0020827 |
| UniRef50_G3SIN0 | 2.643185 | 0.0023509 |
| UniRef50_D3A4C9 | -13.989167 | 0.0000005 |
| UniRef50_Q15662: Transformation-related protein (Fragment) | -14.023106 | 0.0000000 |
| UniRef50_R5BM51: Rubredoxin | -14.096789 | 0.0000004 |
| UniRef50_K0ZJH0 | -14.097575 | 0.0000000 |
| UniRef50_I1YR15: PF14128 domain protein | -14.149381 | 0.0000003 |
| UniRef50_A5TSU5 | -14.163944 | 0.0000028 |
| UniRef50_F0ET79 | -14.215695 | 0.0000093 |
| UniRef50_F2KZW6 | -14.258185 | 0.0000000 |
| UniRef50_D1BML4 | -14.260063 | 0.0000000 |
| UniRef50_F8WU95 | -14.261922 | 0.0001824 |
| UniRef50_V8M0W9 | -14.277070 | 0.0000359 |
| UniRef50_UPI0002743795 | -14.306087 | 0.0000000 |
| UniRef50_K4AUT0 | -14.342607 | 0.0000000 |
| UniRef50_D3I623 | -14.345769 | 0.0003207 |
| UniRef50_UPI0001D54BAB: PREDICTED: hypothetical protein LOC100423797 | -14.350340 | 0.0000000 |
| UniRef50_E6MSA0 | -14.415821 | 0.0000067 |
| UniRef50_D9RRX3 | -14.416132 | 0.0000000 |
| UniRef50_C7T763: Conserved protein | -14.437388 | 0.0000000 |
| UniRef50_F9EJV5 | -14.437395 | 0.0000112 |
| UniRef50_Q67JS5: 50S ribosomal protein L33 | -14.443151 | 0.0000002 |
| UniRef50_F8WU92 | -14.474993 | 0.0000125 |
| UniRef50_E3H1F1 | -14.493173 | 0.0002929 |
| UniRef50_B2DWT6 | -14.570674 | 0.0000000 |
| UniRef50_Q5R4Z9 | -14.596096 | 0.0000000 |
| UniRef50_D9RWB8 | -14.630906 | 0.0000004 |
| UniRef50_E7EQT9: C4b-binding protein beta chain | -14.693200 | 0.0000000 |
| UniRef50_T0V883: Peptide deformylase | -14.703710 | 0.0000000 |
| UniRef50_Q0SNB1: 50S ribosomal protein L33 | -14.703815 | 0.0000000 |
| UniRef50_F5XS62 | -14.882678 | 0.0000000 |
| UniRef50_D4CVQ8 | -14.890000 | 0.0000432 |
| UniRef50_R5FC77 | -14.930794 | 0.0002078 |
| UniRef50_J7SHX6 | -14.994454 | 0.0000000 |
| UniRef50_L1N274 | -15.056507 | 0.0000064 |
| UniRef50_Q5FHQ2: 50S ribosomal protein L34 | -15.113213 | 0.0000002 |
| UniRef50_P66235: 50S ribosomal protein L33 3 | -15.212730 | 0.0000000 |
| UniRef50_K3Z2X2 | -15.315930 | 0.0000018 |
| UniRef50_R6WM19 | -15.362005 | 0.0000000 |
| UniRef50_Q045W4: 50S ribosomal protein L33 1 | -15.445991 | 0.0000000 |
| UniRef50_Q8PJA2 | -15.464577 | 0.0000000 |
| UniRef50_Q9HC73-2: Isoform 2 of Cytokine receptor-like factor 2 | -15.472136 | 0.0000000 |
| UniRef50_A5TWX0 | -15.507820 | 0.0000080 |
| UniRef50_G6AI72 | -15.763796 | 0.0000001 |
| UniRef50_UPI00029DC723 | -15.788376 | 0.0000000 |
| UniRef50_R0ENV9: Exodeoxyribonuclease III (Xth) | -16.027757 | 0.0000000 |
| UniRef50_UPI00027F211C | -16.055269 | 0.0000000 |
| UniRef50_UPI0002ADA9E1 | -16.197589 | 0.0000000 |
| UniRef50_UPI000273E20F | -16.210068 | 0.0000000 |
| UniRef50_UPI00045D7F14 | -16.483538 | 0.0000000 |
| UniRef50_A4VYJ1 | -16.776385 | 0.0000000 |
| UniRef50_unknown | -18.595382 | 0.0010287 |
Negative fold changes are higher in buccal samples while positive are higher in blood samples.
bb = genes.loc[:, ['Buccal_Blood_FC', 'Buccal_Blood_p']]
bb = bb[bb['Buccal_Blood_p'] < 0.01]
bb= bb.sort_values(by=['Buccal_Blood_FC'], ascending=False).rename(columns={'Buccal_Blood_FC':'Log2 fold change', 'Buccal_Blood_p':'p'})
bb = pd.concat([bb.head(n=50), bb.tail(n=50)])
bb.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bb %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| UniRef50_E1GF92 | 5.743297 | 0.0005166 |
| UniRef50_Q1AN79: MHC class II antigen (Fragment) | 3.563088 | 0.0007322 |
| UniRef50_X1FQ44: Marine sediment metagenome DNA, contig: S03H2_L09072 (Fragment) | 3.336042 | 0.0002325 |
| UniRef50_L5KJU1: 60S ribosomal protein L4 | 3.084726 | 0.0031901 |
| UniRef50_F6W0F5 | 2.753011 | 0.0036775 |
| UniRef50_H2PP93 | 2.673160 | 0.0071481 |
| UniRef50_H9FCS1: Mediator of DNA damage checkpoint protein 1 (Fragment) | 2.524942 | 0.0077217 |
| UniRef50_F8LAC9 | 2.287875 | 0.0047304 |
| UniRef50_L5LZ23: Olfactory receptor 51G1 | 2.227058 | 0.0010465 |
| UniRef50_G3UWI9: SMT3 suppressor of mif two 3 homolog 3 (Yeast), isoform CRA_c | 2.072888 | 0.0097392 |
| UniRef50_F7HKI0: Peptidyl-prolyl cis-trans isomerase (Fragment) | 2.058932 | 0.0041232 |
| UniRef50_H0XH74 | 2.034486 | 0.0036756 |
| UniRef50_Q4Y9X3: Ferlin like protein, putative (Fragment) | 1.921880 | 0.0092333 |
| UniRef50_F7I766: Taste receptor type 2 | 1.849345 | 0.0066352 |
| UniRef50_M5FK34: Olfactory receptor | 1.831726 | 0.0027632 |
| UniRef50_F8L9X4 | 1.813557 | 0.0048592 |
| UniRef50_M9WM38: Heat shock cognate 70 isoform 2 (Fragment) | 1.787270 | 0.0037311 |
| UniRef50_UPI000387D4FF: PREDICTED: spermidine/spermine N(1)-acetyltransferase-like protein 1-like isoform X1 | 1.773936 | 0.0039887 |
| UniRef50_UPI0003EAEEA6: PREDICTED: spermidine/spermine N(1)-acetyltransferase-like protein 1-like isoform X1 | 1.731149 | 0.0044790 |
| UniRef50_UPI00046B9C67: PREDICTED: zinc finger protein 239-like | 1.706392 | 0.0052273 |
| UniRef50_H2PLR3 | 1.690212 | 0.0011983 |
| UniRef50_UPI0003AB5B64 | 1.687717 | 0.0092839 |
| UniRef50_Q646G0: Taste receptor type 2 member 46 | 1.658515 | 0.0025296 |
| UniRef50_UPI00045DA955 | 1.657828 | 0.0015369 |
| UniRef50_P06310: Ig kappa chain V-II region RPMI 6410 | 1.653949 | 0.0039865 |
| UniRef50_J3QQQ9: KRAB-A domain-containing protein 2 | 1.652828 | 0.0012095 |
| UniRef50_G3RNZ9 | 1.643521 | 0.0036612 |
| UniRef50_Q8WMI5: Protocadherin beta 3’ (Fragment) | 1.639896 | 0.0042772 |
| UniRef50_H9K9F5 | 1.586861 | 0.0066564 |
| UniRef50_B4DF53: cDNA FLJ53551, highly similar to Zinc finger protein 248 | 1.577706 | 0.0020450 |
| UniRef50_F8W7C6: 60S ribosomal protein L10 | 1.573421 | 0.0026586 |
| UniRef50_UPI00046B6145: PREDICTED: putative double homeobox protein 2, partial | 1.544664 | 0.0084687 |
| UniRef50_L9L4H1: High mobility group protein B1 | 1.529451 | 0.0014162 |
| UniRef50_G5AQ84: Olfactory receptor | 1.493635 | 0.0085284 |
| UniRef50_F7A8K2 | 1.476652 | 0.0065107 |
| UniRef50_P56545: C-terminal-binding protein 2 | 1.442803 | 0.0002386 |
| UniRef50_Q2VAZ9: Spd4 | 1.431028 | 0.0091354 |
| UniRef50_Q91YZ2: C-terminal binding protein 2, isoform CRA_b | 1.427224 | 0.0004516 |
| UniRef50_G3QH92 | 1.414881 | 0.0018520 |
| UniRef50_UPI0003EAF13E: PREDICTED: LOW QUALITY PROTEIN: mucin-3A, partial | 1.409414 | 0.0086321 |
| UniRef50_I7GIU3: Macaca fascicularis brain cDNA clone: QflA-22647, similar to human DAZ interacting protein 1 (DZIP1), mRNA, RefSeq: NM_198968.1 | 1.404076 | 0.0051347 |
| UniRef50_UPI0003EB002A | 1.401988 | 0.0001113 |
| UniRef50_UPI00045D9A97: PREDICTED: protocadherin beta-17 | 1.392696 | 0.0056254 |
| UniRef50_P27635: 60S ribosomal protein L10 | 1.389956 | 0.0063634 |
| UniRef50_C3Z1N2 | 1.381111 | 0.0083114 |
| UniRef50_Q95M20: Envelope glycoprotein (Fragment) | 1.366104 | 0.0002640 |
| UniRef50_UPI000387AFAD: PREDICTED: NADH-ubiquinone oxidoreductase chain 5-like isoform X1 | 1.343646 | 0.0000548 |
| UniRef50_UPI0003AB8CC4 | 1.315363 | 0.0002565 |
| UniRef50_G0WMZ3: Histone H3 (Fragment) | 1.300715 | 0.0033259 |
| UniRef50_L7MA99: Putative integrin beta-like 1 (Fragment) | 1.262451 | 0.0023312 |
| UniRef50_P65239: Ribose-phosphate pyrophosphokinase 1 | -12.532742 | 0.0000233 |
| UniRef50_B5E2F8 | -12.542684 | 0.0000179 |
| UniRef50_G0IBT1: Integral membrane protein | -12.547501 | 0.0000001 |
| UniRef50_A3CRC4 | -12.567026 | 0.0000885 |
| UniRef50_Q74IL5: 50S ribosomal protein L27 | -12.606708 | 0.0000154 |
| UniRef50_Q8CZ89: UPF0297 protein spr0175 | -12.610265 | 0.0002538 |
| UniRef50_B2IN20 | -12.614462 | 0.0000135 |
| UniRef50_P66854: Single-stranded DNA-binding protein | -12.626927 | 0.0001714 |
| UniRef50_G0IAJ9 | -12.754517 | 0.0000488 |
| UniRef50_A8AXP8: 30S ribosomal protein S21 | -12.781951 | 0.0000017 |
| UniRef50_Q49UP7 | -12.789827 | 0.0000005 |
| UniRef50_T1Z2R4: Acyl carrier protein | -12.793585 | 0.0001978 |
| UniRef50_B1SBJ3 | -12.839826 | 0.0000085 |
| UniRef50_Q03IY7: Transposase | -12.868076 | 0.0005401 |
| UniRef50_P31306: Oligopeptide-binding protein SarA | -12.869272 | 0.0000136 |
| UniRef50_B1I837: UPF0356 protein SPH_0238 | -12.870009 | 0.0000048 |
| UniRef50_P64405 | -12.870288 | 0.0000168 |
| UniRef50_Q18CG9: 50S ribosomal protein L29 | -12.873797 | 0.0000004 |
| UniRef50_A4W3V5 | -12.876910 | 0.0000098 |
| UniRef50_A0A023VL47 | -12.882949 | 0.0000243 |
| UniRef50_Q6ZPA4: CDNA FLJ26171 fis, clone ADG03656 | -12.907779 | 0.0000000 |
| UniRef50_T0V883: Peptide deformylase | -12.939411 | 0.0000001 |
| UniRef50_M5MR97 | -13.022733 | 0.0000019 |
| UniRef50_M7MB60: Transcriptional regulator ComX1 | -13.027114 | 0.0002164 |
| UniRef50_Q035A1: 50S ribosomal protein L30 | -13.029991 | 0.0000105 |
| UniRef50_D6ZNE8 | -13.066363 | 0.0000146 |
| UniRef50_Q9NWI4: CDNA FLJ20837 fis, clone ADKA02602 | -13.183296 | 0.0000000 |
| UniRef50_A7HM29: Translation initiation factor IF-1 | -13.221044 | 0.0001248 |
| UniRef50_Q0TMQ9: 30S ribosomal protein S14 type Z | -13.241974 | 0.0000244 |
| UniRef50_UPI000387C557: PREDICTED: transmembrane protein 14C isoform X1 | -13.307812 | 0.0000000 |
| UniRef50_B5XL02 | -13.315332 | 0.0000014 |
| UniRef50_M4KYC9 | -13.328328 | 0.0000200 |
| UniRef50_Q88WK6: 50S ribosomal protein L28 | -13.395329 | 0.0000037 |
| UniRef50_A4VUC9 | -13.546590 | 0.0000081 |
| UniRef50_J7SHX6 | -13.774661 | 0.0005302 |
| UniRef50_H7QSD9 | -13.806682 | 0.0009866 |
| UniRef50_Q8NHA8: Olfactory receptor 1F12 | -13.875289 | 0.0000000 |
| UniRef50_UPI0002BB9C7E: hypothetical protein, partial | -13.983423 | 0.0000037 |
| UniRef50_UPI00027F40D7: PREDICTED: zinc finger protein DPF3-like | -14.225737 | 0.0000000 |
| UniRef50_K4AUT0 | -14.330889 | 0.0000028 |
| UniRef50_M1YD77 | -14.345829 | 0.0006540 |
| UniRef50_K0ZJH0 | -14.646890 | 0.0000000 |
| UniRef50_UPI0001D54BAB: PREDICTED: hypothetical protein LOC100423797 | -14.710495 | 0.0000000 |
| UniRef50_A4VYJ1 | -16.099654 | 0.0007280 |
| UniRef50_unknown | -16.342427 | 0.0025047 |
| UniRef50_UPI00027F211C | -16.835101 | 0.0000000 |
| UniRef50_Q8PJA2 | -17.042285 | 0.0000000 |
| UniRef50_UPI00045D7F14 | -17.107391 | 0.0000000 |
| UniRef50_B2DWT6 | -17.108883 | 0.0000089 |
| UniRef50_R0ENV9: Exodeoxyribonuclease III (Xth) | -17.933310 | 0.0000000 |
Negative fold changes are higher in saliva samples while positive are higher in buccal samples.
sb = genes.loc[:, ['Saliva_Buccal_FC', 'Saliva_Buccal_p']]
sb = sb[sb['Saliva_Buccal_p'] < 0.01]
sb= sb.sort_values(by=['Saliva_Buccal_FC'], ascending=False).rename(columns={'Saliva_Buccal_FC':'Log2 fold change', 'Saliva_Buccal_p':'p'})
sb = pd.concat([sb.head(n=50), sb.tail(n=50)])
sb.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$sb %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| UniRef50_F2UXX5 | 7.315803 | 0.0099579 |
| UniRef50_E2ANL9: Calcium-transporting ATPase sarcoplasmic/endoplasmic reticulum type (Fragment) | 6.908305 | 0.0000913 |
| UniRef50_L8ECE5: Alternative protein CELSR1 | 6.343497 | 0.0004922 |
| UniRef50_UPI0000D9EED1: PREDICTED: intermediate conductance calcium-activated potassium channel protein 4-like | 5.954222 | 0.0002641 |
| UniRef50_B2GFL3: Thiamine-monophosphate kinase | 5.914369 | 0.0079731 |
| UniRef50_F9Q0X0: DNA polymerase III, beta subunit | 5.903499 | 0.0043347 |
| UniRef50_E8JV65 | 5.769518 | 0.0009157 |
| UniRef50_Q28933: Insulin-like growth factor I (Fragment) | 5.514441 | 0.0001828 |
| UniRef50_R0P4C8: ABC transporter ATP-binding protein | 5.499301 | 0.0001294 |
| UniRef50_Q9NPP4-4: Isoform 4 of NLR family CARD domain-containing protein 4 | 5.462684 | 0.0005657 |
| UniRef50_W5L103 | 5.415375 | 0.0004958 |
| UniRef50_F0I1V1 | 5.405717 | 0.0041094 |
| UniRef50_Q9H521 | 5.405141 | 0.0011870 |
| UniRef50_A2D556: HDAC9 (Fragment) | 5.277441 | 0.0008590 |
| UniRef50_Q4T0Y0: Chromosome undetermined SCAF10794, whole genome shotgun sequence. (Fragment) | 5.265559 | 0.0005947 |
| UniRef50_R0N8S7: GTP cyclohydrolase I | 5.158222 | 0.0008652 |
| UniRef50_F7H3X2 | 4.861689 | 0.0011155 |
| UniRef50_F9PM29: Peptidyl-tRNA hydrolase PTH2 | 4.835777 | 0.0066279 |
| UniRef50_F2V0X4 | 4.817801 | 0.0025800 |
| UniRef50_F8VU50: Cytochrome P450 4F12 (Fragment) | 4.721608 | 0.0026254 |
| UniRef50_O18892: Neuroendocrine-specific protein (Fragment) | 4.628135 | 0.0007919 |
| UniRef50_S7MTM3: Insulin growth factor-like family member 3 | 4.571237 | 0.0001902 |
| UniRef50_M5BF77: 5-hydroxytryptamine (Serotonin) receptor 7a (Fragment) | 4.436518 | 0.0001619 |
| UniRef50_A7S0H5: Predicted protein (Fragment) | 4.397030 | 0.0046533 |
| UniRef50_Q6P1D3: Kif3b protein | 4.377647 | 0.0013682 |
| UniRef50_G9L0C0: Cyclin-dependent kinase 2-associated protein 2-like protein (Fragment) | 4.329003 | 0.0097144 |
| UniRef50_UPI00039402E6: PREDICTED: complement C1q tumor necrosis factor-related protein 1-like | 4.314720 | 0.0012100 |
| UniRef50_E3H3Y7 | 4.314125 | 0.0077289 |
| UniRef50_H0YN86: DNA-binding protein SATB1 | 4.215396 | 0.0000863 |
| UniRef50_E0PRL0 | 4.175238 | 0.0063785 |
| UniRef50_G3V3U3: Gamma-aminobutyric acid receptor subunit beta-3 | 4.158766 | 0.0017780 |
| UniRef50_M1EMH1: Differentially expressed in FDCP 8-like protein (Fragment) | 4.137598 | 0.0034429 |
| UniRef50_UPI00046B4812: PREDICTED: voltage-dependent calcium channel gamma-5 subunit-like | 4.126605 | 0.0025449 |
| UniRef50_F3PD35: Conserved domain protein | 4.098196 | 0.0079865 |
| UniRef50_E3GZW8 | 3.986665 | 0.0059652 |
| UniRef50_P16952: Agglutinin receptor | 3.960138 | 0.0012740 |
| UniRef50_L9K3Q1: 40S ribosomal protein S7 | 3.925397 | 0.0098665 |
| UniRef50_J0WNZ4: PF11377 family protein | 3.902408 | 0.0088429 |
| UniRef50_F2UXV1 | 3.898271 | 0.0035723 |
| UniRef50_E3H5T2 | 3.871498 | 0.0099899 |
| UniRef50_Q6ZWH7: cDNA FLJ41054 fis, clone STOMA1000189 | 3.866233 | 0.0053689 |
| UniRef50_F2QAI7: Cytochrome b (Fragment) | 3.846908 | 0.0038612 |
| UniRef50_E9PSF1: RNA-binding protein 38 | 3.823172 | 0.0037154 |
| UniRef50_UPI0003ACC708: PREDICTED: glutamine-rich protein 2-like | 3.821570 | 0.0058664 |
| UniRef50_L5KK24 | 3.813490 | 0.0031550 |
| UniRef50_L8E857: Alternative protein ZC3H13 | 3.799492 | 0.0089465 |
| UniRef50_UPI0003C12040: PREDICTED: zinc finger protein 280B-like | 3.735111 | 0.0080621 |
| UniRef50_A8AWT9 | 3.702641 | 0.0086479 |
| UniRef50_B3GS42: Adrenergic receptor alpha-2A (Fragment) | 3.678312 | 0.0008552 |
| UniRef50_I3LCW0 | 3.673261 | 0.0083240 |
| UniRef50_D3A6F4 | -12.353034 | 0.0001144 |
| UniRef50_E6KT76 | -12.357022 | 0.0025087 |
| UniRef50_A0A015Z1A5 | -12.366172 | 0.0085094 |
| UniRef50_C0EPA9 | -12.413849 | 0.0000022 |
| UniRef50_A7FPL6: 50S ribosomal protein L34 | -12.451899 | 0.0000017 |
| UniRef50_C5TLN7 | -12.492172 | 0.0000091 |
| UniRef50_I1YX70 | -12.524738 | 0.0000131 |
| UniRef50_J1LR62 | -12.536216 | 0.0000000 |
| UniRef50_F9DLQ2 | -12.544585 | 0.0019333 |
| UniRef50_F8WUF4 | -12.556291 | 0.0002143 |
| UniRef50_L9PWG5 | -12.580060 | 0.0010930 |
| UniRef50_I1YT36: Protein translocase subunit SecE | -12.606614 | 0.0035617 |
| UniRef50_I1ZK53 | -12.632691 | 0.0000000 |
| UniRef50_U3QCK3 | -12.682848 | 0.0046113 |
| UniRef50_F9DEH1 | -12.685854 | 0.0000004 |
| UniRef50_R5KBV9 | -12.687679 | 0.0005809 |
| UniRef50_D1QVL3 | -12.709784 | 0.0000003 |
| UniRef50_E6MPR7 | -12.718797 | 0.0000013 |
| UniRef50_F0F573 | -12.724685 | 0.0069584 |
| UniRef50_R5ZUD4 | -12.735028 | 0.0047915 |
| UniRef50_Q8R9W1: Acyl carrier protein | -12.788626 | 0.0013549 |
| UniRef50_D3A777 | -12.820328 | 0.0000004 |
| UniRef50_D3A200 | -12.868877 | 0.0000001 |
| UniRef50_D4TYA4 | -12.871035 | 0.0068930 |
| UniRef50_L1PXF5 | -12.911877 | 0.0022917 |
| UniRef50_D2ZXW7 | -13.000393 | 0.0006094 |
| UniRef50_E3H7K8: Protein translocase subunit SecE | -13.004207 | 0.0076877 |
| UniRef50_U5Q4G7 | -13.040389 | 0.0011199 |
| UniRef50_D3A4M7 | -13.058006 | 0.0000000 |
| UniRef50_D1QU93 | -13.129398 | 0.0000007 |
| UniRef50_G1VIN1 | -13.180683 | 0.0000012 |
| UniRef50_C9MRL3 | -13.192778 | 0.0020007 |
| UniRef50_R6EH14 | -13.327954 | 0.0000030 |
| UniRef50_D9RWE8 | -13.350595 | 0.0000003 |
| UniRef50_A7B8U2 | -13.435091 | 0.0002265 |
| UniRef50_J2ZDR2 | -13.467517 | 0.0000000 |
| UniRef50_D4CWL6 | -13.473774 | 0.0041463 |
| UniRef50_F9DEP4 | -13.474756 | 0.0000000 |
| UniRef50_F9DFJ4 | -13.505607 | 0.0000008 |
| UniRef50_E7RTC7 | -13.520921 | 0.0000141 |
| UniRef50_E1L0U7 | -13.531869 | 0.0008408 |
| UniRef50_C3WV96 | -13.802692 | 0.0000199 |
| UniRef50_D3A4C9 | -13.989167 | 0.0000005 |
| UniRef50_F0ET79 | -14.215695 | 0.0005314 |
| UniRef50_F2KZW6 | -14.258185 | 0.0000000 |
| UniRef50_F8WU95 | -14.261922 | 0.0046479 |
| UniRef50_UPI0002743795 | -14.306087 | 0.0000000 |
| UniRef50_Q67JS5: 50S ribosomal protein L33 | -14.443151 | 0.0060342 |
| UniRef50_L1N274 | -15.056507 | 0.0000064 |
| UniRef50_Q5FHQ2: 50S ribosomal protein L34 | -15.113213 | 0.0018994 |
Negative fold changes are higher in saliva samples while positive are higher in saliva_euk samples.
ss = genes.loc[:, ['Saliva_Saliva_euk_FC', 'Saliva_Saliva_euk_p']]
ss = ss[ss['Saliva_Saliva_euk_p'] < 0.01]
ss = ss.sort_values(by=['Saliva_Saliva_euk_FC'], ascending=False).rename(columns={'Saliva_Saliva_euk_FC':'Log2 fold change', 'Saliva_Saliva_euk_p':'p'})
ss = pd.concat([ss.head(n=50), ss.tail(n=50)])
ss.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$ss %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| UniRef50_Q2EG36: Zonadhesin variant 6 (Fragment) | 6.736133 | 0.0003704 |
| UniRef50_E0CZ07: Mucolipin-1 (Fragment) | 6.325679 | 0.0059547 |
| UniRef50_G3IFH3: IQ and AAA domain-containing protein 1 | 5.615730 | 0.0066785 |
| UniRef50_J3QRX4: Coordinator of PRMT5 and differentiation stimulator | 5.404396 | 0.0099960 |
| UniRef50_G9KLG5: Ring finger protein 183 (Fragment) | 5.299399 | 0.0033525 |
| UniRef50_M3WRY0 | 5.271822 | 0.0032388 |
| UniRef50_J0LF75: CDP-alcohol phosphatidyltransferase | 4.973907 | 0.0072992 |
| UniRef50_J4SN93 | 4.912408 | 0.0064539 |
| UniRef50_UPI0002A4C9E1: PREDICTED: serine/arginine-rich splicing factor 9-like isoform 2 | 4.853556 | 0.0038099 |
| UniRef50_X6DTB7 | 4.724411 | 0.0014521 |
| UniRef50_D2I8R3 | 4.415273 | 0.0034481 |
| UniRef50_Q4RB69: Chromosome undetermined SCAF22150, whole genome shotgun sequence. (Fragment) | 4.362065 | 0.0036359 |
| UniRef50_W5NRZ1 | 4.306406 | 0.0047186 |
| UniRef50_Q9NYJ1: Cytochrome c oxidase assembly factor 4 homolog, mitochondrial | 4.273274 | 0.0011101 |
| UniRef50_UPI000333A01E: PREDICTED: cytochrome c oxidase assembly factor 4 homolog, mitochondrial-like | 4.149633 | 0.0007600 |
| UniRef50_Q01BQ0: WGS project CAID00000000 data, contig chromosome 03 | 3.937558 | 0.0070409 |
| UniRef50_C6SFZ1: Glycine dehydrogenase | 3.760227 | 0.0086859 |
| UniRef50_F0GZ41: Putative neutral zinc metallopeptidase | 3.714295 | 0.0038094 |
| UniRef50_F7CTS8 | 3.680199 | 0.0021871 |
| UniRef50_E2B2T0 | 3.645847 | 0.0076141 |
| UniRef50_K9KGU3: Zinc finger E-box-binding homeobox 2-like protein (Fragment) | 3.589550 | 0.0045042 |
| UniRef50_UPI000273E760: PREDICTED: insulin receptor-like | 3.543139 | 0.0026163 |
| UniRef50_UPI000441E382: PREDICTED: protein shisa-2 homolog | 3.542102 | 0.0064692 |
| UniRef50_UPI0003EAEF90: PREDICTED: zinc finger protein 77 isoform X1 | 3.530398 | 0.0085635 |
| UniRef50_UPI00042AEC10: PREDICTED: protein shisa-2 homolog, partial | 3.528187 | 0.0077543 |
| UniRef50_Q4X8J5 | 3.087042 | 0.0096098 |
| UniRef50_W5KCI9 | 3.006566 | 0.0070717 |
| UniRef50_C4J898 | 2.993080 | 0.0030798 |
| UniRef50_Q5U4M3: LOC495467 protein (Fragment) | 2.850899 | 0.0031476 |
| UniRef50_A7RSV6: Predicted protein | 2.817247 | 0.0049236 |
| UniRef50_U6DD35: FZD2 protein (Fragment) | 2.808793 | 0.0012733 |
| UniRef50_H3C2D3 | 2.743496 | 0.0029913 |
| UniRef50_W7F324: 40S ribosomal protein S16 | 2.687886 | 0.0008769 |
| UniRef50_A0A024R088: HCG1995470, isoform CRA_a (Fragment) | 2.638056 | 0.0025685 |
| UniRef50_H2RDM6 | 2.624960 | 0.0085736 |
| UniRef50_G3IHF2: Putative proline-rich protein 21 | 2.519286 | 0.0059569 |
| UniRef50_E2B5E4 | 2.480695 | 0.0068582 |
| UniRef50_K0G5V9: Ribosomal protein L31-like protein | 2.417748 | 0.0015082 |
| UniRef50_H2L3C4 | 2.322147 | 0.0036513 |
| UniRef50_R4G9V3 | 2.298731 | 0.0001725 |
| UniRef50_C6GLY9 | 2.287017 | 0.0061704 |
| UniRef50_UPI0003D080C3: PREDICTED: interferon alpha-1-like | 2.184722 | 0.0053331 |
| UniRef50_UPI00044221BE: PREDICTED: ribosome-binding protein 1-like | 2.164684 | 0.0030225 |
| UniRef50_H6TPI0: Translation elongation factor 1-alpha (Fragment) | 2.079082 | 0.0021020 |
| UniRef50_Q8PJA2 | 2.060768 | 0.0053642 |
| UniRef50_UPI0003D0B7A4: PREDICTED: zinc finger protein 629-like | 2.060686 | 0.0051971 |
| UniRef50_UPI00046B6145: PREDICTED: putative double homeobox protein 2, partial | 2.059583 | 0.0001653 |
| UniRef50_Q9H492: Microtubule-associated proteins 1A/1B light chain 3A | 2.039716 | 0.0092642 |
| UniRef50_E5T9Q2 | 2.018052 | 0.0089603 |
| UniRef50_UPI0003C26DC2: PREDICTED: zinc finger protein 436-like | 2.010972 | 0.0056649 |
| UniRef50_D4TZD9 | -10.002404 | 0.0009972 |
| UniRef50_K0YV53 | -10.008045 | 0.0026087 |
| UniRef50_E6MPZ2 | -10.022918 | 0.0053824 |
| UniRef50_J1BCC5: Putative lipoprotein | -10.049852 | 0.0067986 |
| UniRef50_C6R3D9 | -10.055446 | 0.0044989 |
| UniRef50_W1WGH3 | -10.078205 | 0.0047290 |
| UniRef50_L9PX91 | -10.088781 | 0.0001431 |
| UniRef50_D6LCM2 | -10.095292 | 0.0006760 |
| UniRef50_M5ICP3 | -10.101189 | 0.0017222 |
| UniRef50_E6MQB8 | -10.102489 | 0.0012765 |
| UniRef50_E6MLR7 | -10.105446 | 0.0000378 |
| UniRef50_E6MPQ7 | -10.135183 | 0.0074584 |
| UniRef50_M5LV42 | -10.177302 | 0.0016998 |
| UniRef50_T0V181 | -10.194074 | 0.0053965 |
| UniRef50_E6MP39 | -10.210365 | 0.0000731 |
| UniRef50_S2ZVU4 | -10.229454 | 0.0081521 |
| UniRef50_D4U2D1 | -10.387840 | 0.0037682 |
| UniRef50_I0T894 | -10.391620 | 0.0000002 |
| UniRef50_D1QS27 | -10.443088 | 0.0000015 |
| UniRef50_J0SXJ1 | -10.455402 | 0.0095049 |
| UniRef50_G6QX35: Lysyl-tRNA synthetase | -10.465026 | 0.0010429 |
| UniRef50_C2KZM8 | -10.502725 | 0.0006551 |
| UniRef50_Q9HTK8: Rubredoxin-2 | -10.528115 | 0.0097021 |
| UniRef50_J1B8M1 | -10.540917 | 0.0023392 |
| UniRef50_E1M6H0: L-asparaginase | -10.571133 | 0.0028062 |
| UniRef50_K2CG05 | -10.655385 | 0.0000000 |
| UniRef50_E6MML7 | -10.787475 | 0.0041259 |
| UniRef50_A4VUC9 | -10.797531 | 0.0000027 |
| UniRef50_E6MPU9 | -10.813932 | 0.0000011 |
| UniRef50_C6SPQ1 | -10.875661 | 0.0007018 |
| UniRef50_C6R611 | -10.909512 | 0.0010026 |
| UniRef50_G6ADU9 | -11.040957 | 0.0069180 |
| UniRef50_A7ZFG7 | -11.063752 | 0.0017442 |
| UniRef50_E6ML90 | -11.289791 | 0.0000608 |
| UniRef50_E8SPK9 | -11.291495 | 0.0000004 |
| UniRef50_U2L5W8 | -11.370382 | 0.0035340 |
| UniRef50_G0I7S2 | -11.400386 | 0.0019163 |
| UniRef50_E6MRQ0 | -11.409751 | 0.0094672 |
| UniRef50_E2ZDE4 | -11.421551 | 0.0000102 |
| UniRef50_E6MT18 | -11.452743 | 0.0000023 |
| UniRef50_F2CNP3 | -11.531533 | 0.0000004 |
| UniRef50_E6MTN0 | -11.619507 | 0.0000024 |
| UniRef50_J0SWN8 | -11.709602 | 0.0000015 |
| UniRef50_E6MRC7 | -11.800971 | 0.0000019 |
| UniRef50_J1BCP8 | -11.883256 | 0.0000208 |
| UniRef50_E6MN88 | -12.108664 | 0.0000075 |
| UniRef50_M0R3A6: Fms-related tyrosine kinase 3 ligand (Fragment) | -12.187634 | 0.0000000 |
| UniRef50_E6KT76 | -12.357022 | 0.0000047 |
| UniRef50_B1IBD3: 50S ribosomal protein L33 1 | -13.286300 | 0.0095733 |
| UniRef50_E7RTC7 | -13.520921 | 0.0000141 |
Negative fold changes are higher in buccal samples while positive are higher in buccal_euk samples.
bbe = genes.loc[:, ['Buccal_Buccal_euk_FC', 'Buccal_Buccal_euk_p']]
bbe = bbe[bbe['Buccal_Buccal_euk_p'] < 0.01]
bbe = bbe.sort_values(by=['Buccal_Buccal_euk_FC'], ascending=False).rename(columns={'Buccal_Buccal_euk_FC':'Log2 fold change', 'Buccal_Buccal_euk_p':'p'})
bbe = pd.concat([bbe.head(n=50), bbe.tail(n=50)])
bbe = bbe.groupby(by=bbe.index, axis=0).mean()
bbe.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bbe %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| UniRef50_UPI0003D0E142: PREDICTED: exportin-6-like isoform X1 | 8.116484 | 0.0008299 |
| UniRef50_F8WDA5: Nitrogen permease regulator 3-like protein | 7.930798 | 0.0000000 |
| UniRef50_D1M744: Gamma-aminobutyric acid A receptor beta 2 (Fragment) | 7.386372 | 0.0045171 |
| UniRef50_F8W7P5: Protein FAM92A1 | 6.842389 | 0.0025446 |
| UniRef50_UPI00045DE035: PREDICTED: cadherin-10, partial | 6.782436 | 0.0010449 |
| UniRef50_C3ZX87 | 6.695182 | 0.0000021 |
| UniRef50_L8HKY8 | 6.579498 | 0.0058675 |
| UniRef50_F6T035 | 6.358616 | 0.0000631 |
| UniRef50_H0YES9: ATP-sensitive inward rectifier potassium channel 11 (Fragment) | 6.290137 | 0.0019962 |
| UniRef50_W1W7N6: Formate acetyltransferase | 5.909258 | 0.0020171 |
| UniRef50_B3PMQ8: Hypothetical lipoprotein | 5.692805 | 0.0000004 |
| UniRef50_G7P543 | 5.658893 | 0.0090239 |
| UniRef50_S9W349: Ribosomal protein L10 isoform 4-like protein | 5.391158 | 0.0036590 |
| UniRef50_C4J8P5 | 5.167383 | 0.0049123 |
| UniRef50_UPI00042AD1E7: PREDICTED: disintegrin and metalloproteinase domain-containing protein 20-like, partial | 5.076077 | 0.0078819 |
| UniRef50_S4HKA8: LPXTG-motif protein cell wall anchor domain protein (Fragment) | 4.634721 | 0.0000239 |
| UniRef50_Q8N0Z0: Seven transmembrane helix receptor | 4.523008 | 0.0022326 |
| UniRef50_A0A016UU23 | 3.369483 | 0.0024479 |
| UniRef50_F6WD18 | 2.869978 | 0.0024296 |
| UniRef50_E2BJ09 | 2.808241 | 0.0060241 |
| UniRef50_F8WAS3: NADH dehydrogenase [ubiquinone] 1 alpha subcomplex subunit 5 | 2.519171 | 0.0003033 |
| UniRef50_H2R246 | 2.371328 | 0.0013049 |
| UniRef50_U3JCC1 | 2.362967 | 0.0023879 |
| UniRef50_UPI00020E5039: PREDICTED: golgin subfamily A member 8A-like | 2.171549 | 0.0075876 |
| UniRef50_UPI0002038DEF: PREDICTED: zinc finger protein 84-like | 2.149222 | 0.0062307 |
| UniRef50_D3ZHE0 | 2.046962 | 0.0036935 |
| UniRef50_G5B3Z4: Olfactory receptor 8H1 | 2.009480 | 0.0084992 |
| UniRef50_D1KFN6: Olfactory receptor (Fragment) | 1.993650 | 0.0022319 |
| UniRef50_Q32Q92-2: Isoform 2 of Acyl-coenzyme A thioesterase 6 | 1.969015 | 0.0080379 |
| UniRef50_L9JBM4: Olfactory receptor | 1.892725 | 0.0033091 |
| UniRef50_I7G5N0: Macaca fascicularis brain cDNA, clone: QflA-18143 | 1.889867 | 0.0029983 |
| UniRef50_UPI000387D707: PREDICTED: immunoglobulin superfamily member 3-like | 1.873711 | 0.0008035 |
| UniRef50_UPI00042C6F73: PREDICTED: immunoglobulin superfamily member 3-like, partial | 1.783043 | 0.0002690 |
| UniRef50_UPI0004542214: PREDICTED: zinc finger protein 239-like, partial | 1.761047 | 0.0090689 |
| UniRef50_M0R1E6 | 1.726397 | 0.0071132 |
| UniRef50_UPI00046B9C67: PREDICTED: zinc finger protein 239-like | 1.706392 | 0.0085283 |
| UniRef50_L5KJS6: Polycystin-1 (Fragment) | 1.692470 | 0.0064700 |
| UniRef50_G5C4N0: ATP synthase lipid-binding protein, mitochondrial | 1.682890 | 0.0056619 |
| UniRef50_L5LHR9: Eukaryotic initiation factor 4A-I | 1.613836 | 0.0026058 |
| UniRef50_UPI0003942425: PREDICTED: 60S ribosomal protein L13a-like | 1.592437 | 0.0099378 |
| UniRef50_F7FDF6 | 1.592109 | 0.0056062 |
| UniRef50_G7Q0J4 | 1.588236 | 0.0096193 |
| UniRef50_L5MIL4: Eukaryotic initiation factor 4A-I | 1.569020 | 0.0079874 |
| UniRef50_P15105: Glutamine synthetase | 1.563012 | 0.0014998 |
| UniRef50_M3WYY8 | 1.528468 | 0.0031577 |
| UniRef50_L9KZ86: Polycystin-1 | 1.508605 | 0.0090114 |
| UniRef50_UPI00042BD796: PREDICTED: zinc finger protein 850-like | 1.489909 | 0.0035273 |
| UniRef50_Q6DET9: Elongation factor 1-beta | 1.436267 | 0.0053110 |
| UniRef50_Q8VGX7: Olfactory receptor | 1.407683 | 0.0073447 |
| UniRef50_M7AMJ2 | 1.406419 | 0.0088378 |
| UniRef50_D2NR32: Predicted phosphatase/phosphohexomutase | -9.055643 | 0.0053412 |
| UniRef50_E4XC66: Whole genome shotgun assembly, allelic scaffold set, scaffold scaffoldA_459 | -9.065122 | 0.0047685 |
| UniRef50_K0JQ32 | -9.092686 | 0.0000009 |
| UniRef50_E5RIU9: Charged multivesicular body protein 7 (Fragment) | -9.108811 | 0.0000000 |
| UniRef50_H6PBP7: TfoX N-terminal domain family protein | -9.147272 | 0.0000176 |
| UniRef50_Q8G526: 4-hydroxy-tetrahydrodipicolinate reductase | -9.174536 | 0.0031181 |
| UniRef50_Q82MT8: 50S ribosomal protein L31 type B 1 | -9.175704 | 0.0030548 |
| UniRef50_D8UNZ7 | -9.192506 | 0.0000083 |
| UniRef50_S4WAA0 | -9.192524 | 0.0089845 |
| UniRef50_P37079: Sorbitol-6-phosphate 2-dehydrogenase | -9.207171 | 0.0096250 |
| UniRef50_E3GZV7 | -9.210175 | 0.0000132 |
| UniRef50_C5C0V6: Exodeoxyribonuclease 7 small subunit | -9.213471 | 0.0000401 |
| UniRef50_U1R6U4 | -9.307144 | 0.0000542 |
| UniRef50_C0MD90 | -9.326540 | 0.0000410 |
| UniRef50_D2NPD9: Type IV secretory pathway, VirB11 component | -9.353039 | 0.0076499 |
| UniRef50_E3H441: 2-oxoglutarate dehydrogenase, E2 component, dihydrolipoamide succinyltransferase | -9.384205 | 0.0000000 |
| UniRef50_D6S6C8 | -9.437258 | 0.0085244 |
| UniRef50_A0A024DF77 | -9.445438 | 0.0001582 |
| UniRef50_D2NQ61: Transcriptional regulator | -9.458625 | 0.0090883 |
| UniRef50_Q6A6M8: 50S ribosomal protein L23 | -9.465607 | 0.0054282 |
| UniRef50_Q01WB8: 30S ribosomal protein S11 | -9.508923 | 0.0056135 |
| UniRef50_E4STI3 | -9.550146 | 0.0001121 |
| UniRef50_Q744X6: 30S ribosomal protein S18 1 | -9.568566 | 0.0000144 |
| UniRef50_A1SNJ0: 30S ribosomal protein S11 | -9.590006 | 0.0067633 |
| UniRef50_G5EST0 | -9.608321 | 0.0000251 |
| UniRef50_Q88XW2: 30S ribosomal protein S13 | -9.621529 | 0.0061989 |
| UniRef50_M5KMU2 | -9.646129 | 0.0000184 |
| UniRef50_M1DI68 | -9.653628 | 0.0000075 |
| UniRef50_J9P2F6 | -9.655583 | 0.0000000 |
| UniRef50_F9EJV5 | -9.686258 | 0.0000166 |
| UniRef50_E3H5A0: Molybdenum-pterin-binding protein 2 | -9.693693 | 0.0000028 |
| UniRef50_D8UND8 | -9.729680 | 0.0000235 |
| UniRef50_I6XZ28 | -9.855039 | 0.0000000 |
| UniRef50_B3EUK4: 50S ribosomal protein L15 | -9.872957 | 0.0081978 |
| UniRef50_A5TSU1 | -10.024282 | 0.0000002 |
| UniRef50_T0T1M2: Nitroimidazole resistance protein, putative | -10.081178 | 0.0047272 |
| UniRef50_A5TX05 | -10.298841 | 0.0000168 |
| UniRef50_I1ZN41: IS200 family transposase | -10.329254 | 0.0072293 |
| UniRef50_S4L0C2 | -10.393216 | 0.0023871 |
| UniRef50_C4L7S9: 30S ribosomal protein S10 | -10.428054 | 0.0099966 |
| UniRef50_A0A024DHB8: Membrane protein | -10.715539 | 0.0061477 |
| UniRef50_H0Y539: SHC-transforming protein 1 (Fragment) | -10.885901 | 0.0000000 |
| UniRef50_E4DIQ9 | -10.924723 | 0.0001226 |
| UniRef50_B5E5S9 | -11.066803 | 0.0094706 |
| UniRef50_W1XIF5 | -11.316334 | 0.0089897 |
| UniRef50_A5TSU5 | -11.581322 | 0.0003144 |
| UniRef50_B5E2F8 | -12.542684 | 0.0064844 |
| UniRef50_Q8N7Y7: cDNA FLJ40209 fis, clone TESTI2020999 | -13.065945 | 0.0000000 |
| UniRef50_UPI00045D7F14 | -17.107391 | 0.0000000 |
| UniRef50_R0ENV9: Exodeoxyribonuclease III (Xth) | -17.933310 | 0.0000000 |
Negative fold changes are higher in blood samples while positive are higher in saliva_euk samples.
blbe = genes.loc[:, ['Blood_Saliva_euk_FC', 'Blood_Saliva_euk_p']]
blbe = blbe[blbe['Blood_Saliva_euk_p'] < 0.01]
blbe = blbe.sort_values(by=['Blood_Saliva_euk_FC'], ascending=False).rename(columns={'Blood_Saliva_euk_FC':'Log2 fold change', 'Blood_Saliva_euk_p':'p'})
blbe = pd.concat([blbe.head(n=50), blbe.tail(n=50)])
blbe = blbe.groupby(by=blbe.index, axis=0).mean()
blbe.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$blbe %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| UniRef50_Q8PJA2 | 17.525345 | 0.0000000 |
| UniRef50_R0ENV9: Exodeoxyribonuclease III (Xth) | 17.088123 | 0.0000000 |
| UniRef50_unknown | 16.964790 | 0.0025297 |
| UniRef50_Q9NRE8: PADI-H protein | 14.939340 | 0.0000000 |
| UniRef50_UPI0001D54BAB: PREDICTED: hypothetical protein LOC100423797 | 14.758022 | 0.0000000 |
| UniRef50_A4VYJ1 | 14.649395 | 0.0001501 |
| UniRef50_Q8NHA8: Olfactory receptor 1F12 | 14.057480 | 0.0000000 |
| UniRef50_P66235: 50S ribosomal protein L33 3 | 13.942107 | 0.0000002 |
| UniRef50_G3RYC5 | 13.881953 | 0.0000000 |
| UniRef50_H0YGN8: PCTP-like protein (Fragment) | 13.351819 | 0.0000000 |
| UniRef50_UPI000387C557: PREDICTED: transmembrane protein 14C isoform X1 | 13.317821 | 0.0000000 |
| UniRef50_T0V883: Peptide deformylase | 13.169019 | 0.0000004 |
| UniRef50_R6WM19 | 13.108296 | 0.0003066 |
| UniRef50_J7SHX6 | 13.103721 | 0.0000124 |
| UniRef50_F7ISS9 | 12.969938 | 0.0000000 |
| UniRef50_Q6ZPA4: CDNA FLJ26171 fis, clone ADG03656 | 12.873942 | 0.0000000 |
| UniRef50_F5XS62 | 12.824878 | 0.0000049 |
| UniRef50_UPI000387BEC9 | 12.799955 | 0.0000000 |
| UniRef50_C7T763: Conserved protein | 12.783316 | 0.0000115 |
| UniRef50_Q71RD9: PP10764 | 12.779946 | 0.0000000 |
| UniRef50_K4AUT0 | 12.644386 | 0.0000001 |
| UniRef50_H0YBG6: Stress-70 protein, mitochondrial (Fragment) | 12.622281 | 0.0000000 |
| UniRef50_Q5FHQ2: 50S ribosomal protein L34 | 12.594049 | 0.0000069 |
| UniRef50_A5TSU5 | 12.406862 | 0.0008557 |
| UniRef50_V8CH43 | 12.385584 | 0.0000001 |
| UniRef50_F9EJV5 | 12.306094 | 0.0000925 |
| UniRef50_Q6YL41 | 12.219578 | 0.0000000 |
| UniRef50_O83217: Elongation factor Tu | 12.147498 | 0.0000005 |
| UniRef50_I1YR15: PF14128 domain protein | 12.096193 | 0.0000005 |
| UniRef50_H2PQZ3 | 12.030992 | 0.0000000 |
| UniRef50_C4L7S9: 30S ribosomal protein S10 | 11.916521 | 0.0000430 |
| UniRef50_Q73JJ6: 50S ribosomal protein L7/L12 | 11.879895 | 0.0012062 |
| UniRef50_C9MRL3 | 11.879281 | 0.0000007 |
| UniRef50_D3ID43 | 11.817884 | 0.0000043 |
| UniRef50_A4SCS1: 50S ribosomal protein L5 | 11.780822 | 0.0000021 |
| UniRef50_P25461: 50S ribosomal protein L33, chloroplastic | 11.644994 | 0.0000012 |
| UniRef50_M1YD77 | 11.638134 | 0.0000649 |
| UniRef50_Q73WG1: Serine hydroxymethyltransferase | 11.608425 | 0.0000180 |
| UniRef50_Q28UY2: 50S ribosomal protein L11 | 11.607036 | 0.0000170 |
| UniRef50_Q7WK82: ATP-dependent Clp protease ATP-binding subunit ClpX | 11.574924 | 0.0000125 |
| UniRef50_B1ZNE5: 50S ribosomal protein L2 | 11.564315 | 0.0002025 |
| UniRef50_O31678: NADPH-dependent 7-cyano-7-deazaguanine reductase | 11.563929 | 0.0000492 |
| UniRef50_G0IBT1: Integral membrane protein | 11.552031 | 0.0000063 |
| UniRef50_Q5WII6: IS605/IS200 family transposase | 11.548898 | 0.0000002 |
| UniRef50_B2GAM0: Enolase | 11.542368 | 0.0000166 |
| UniRef50_Q2S3P1: 30S ribosomal protein S11 | 11.525881 | 0.0000135 |
| UniRef50_Q67JV0: 50S ribosomal protein L16 | 11.503054 | 0.0000362 |
| UniRef50_A5ZLI2 | 11.495763 | 0.0000058 |
| UniRef50_R7JXR0: Transposase IS200-like protein | 11.473894 | 0.0000000 |
| UniRef50_Q8A276: 30S ribosomal protein S20 | 11.468189 | 0.0004102 |
| UniRef50_P03886: NADH-ubiquinone oxidoreductase chain 1 | -1.528942 | 0.0070742 |
| UniRef50_Q36644: Cytochrome b (Fragment) | -1.530727 | 0.0052435 |
| UniRef50_P00395: Cytochrome c oxidase subunit 1 | -1.531205 | 0.0001640 |
| UniRef50_A8NTP2: Protein Bm983 | -1.547804 | 0.0037564 |
| UniRef50_H9M4B7: Cytochrome c oxidase subunit 1 | -1.559664 | 0.0003397 |
| UniRef50_X2D544: Cytochrome c oxidase subunit 2 (Fragment) | -1.573120 | 0.0039919 |
| UniRef50_Q35805: Cytochrome b (Fragment) | -1.581162 | 0.0059227 |
| UniRef50_D3DU34: HCG1817947, isoform CRA_a | -1.594312 | 0.0090239 |
| UniRef50_G1Q286 | -1.595514 | 0.0020015 |
| UniRef50_P50656: Cytochrome c oxidase subunit 1 (Fragment) | -1.609859 | 0.0010747 |
| UniRef50_K9LQB2: Cytochrome b (Fragment) | -1.619860 | 0.0048235 |
| UniRef50_Q580R0 | -1.631223 | 0.0002570 |
| UniRef50_G7K0E8 | -1.667354 | 0.0031348 |
| UniRef50_Q8WMI5: Protocadherin beta 3’ (Fragment) | -1.677626 | 0.0094756 |
| UniRef50_P82046: Cytochrome b (Fragment) | -1.681069 | 0.0023857 |
| UniRef50_M4NJR0: Cytochrome b (Fragment) | -1.681089 | 0.0007053 |
| UniRef50_B3UYF0: Cytochrome c oxidase subunit 1 | -1.686232 | 0.0003859 |
| UniRef50_R4PTX6: NADH-ubiquinone oxidoreductase chain 1 (Fragment) | -1.703776 | 0.0010108 |
| UniRef50_F6PXC3: Histone H3 (Fragment) | -1.724211 | 0.0047300 |
| UniRef50_H6BD92: ATP synthase FO subunit 6 (Fragment) | -1.729135 | 0.0025152 |
| UniRef50_H2PLR3 | -1.734748 | 0.0051965 |
| UniRef50_A0EXD5: Cytochrome b (Fragment) | -1.755071 | 0.0014336 |
| UniRef50_P00403: Cytochrome c oxidase subunit 2 | -1.776871 | 0.0041089 |
| UniRef50_D8LAJ2: NADH-ubiquinone oxidoreductase chain 1 (Fragment) | -1.783306 | 0.0012416 |
| UniRef50_P03905: NADH-ubiquinone oxidoreductase chain 4 | -1.846289 | 0.0016293 |
| UniRef50_Q27GW8: NADH-ubiquinone oxidoreductase chain 1 | -1.905353 | 0.0087612 |
| UniRef50_S6IEN3 | -1.923147 | 0.0071962 |
| UniRef50_E1GF92 | -1.929251 | 0.0069511 |
| UniRef50_P03915: NADH-ubiquinone oxidoreductase chain 5 | -1.932182 | 0.0009014 |
| UniRef50_F7I766: Taste receptor type 2 | -1.944672 | 0.0051698 |
| UniRef50_A1XEE7: COX1 (Fragment) | -1.954167 | 0.0006538 |
| UniRef50_P03901: NADH-ubiquinone oxidoreductase chain 4L | -2.107066 | 0.0085070 |
| UniRef50_V4LU59 | -2.203396 | 0.0038071 |
| UniRef50_P0CG47: Polyubiquitin-B | -2.252169 | 0.0072864 |
| UniRef50_UPI0003688413: hypothetical protein | -2.429962 | 0.0058653 |
| UniRef50_W1YJZ2 | -2.490203 | 0.0093057 |
| UniRef50_Q8HXG2: Macaca fascicularis brain cDNA clone: QflA-20071, similar to human ATP synthase 6 (MTATP6), mRNA, RefSeq: NM_173702.1 | -2.686430 | 0.0003784 |
| UniRef50_J9E765 | -2.692402 | 0.0036926 |
| UniRef50_I6LBK7: Cytochrome c oxidase subunit 1 (Fragment) | -2.759758 | 0.0055914 |
| UniRef50_S8BUY4 | -3.025371 | 0.0049229 |
| UniRef50_X1FQ44: Marine sediment metagenome DNA, contig: S03H2_L09072 (Fragment) | -3.040754 | 0.0008733 |
| UniRef50_W1WD51 | -3.123316 | 0.0033155 |
| UniRef50_V8PE67: Tyrosine-protein phosphatase non-receptor type 11 (Fragment) | -3.262423 | 0.0061165 |
| UniRef50_W1V5A8 | -3.600586 | 0.0020817 |
| UniRef50_W5I2W3: Cytochrome c oxidase subunit 3 | -4.202874 | 0.0065131 |
| UniRef50_I1SRR0: NADH dehydrogenase subunit 4 (Fragment) | -4.524950 | 0.0082184 |
| UniRef50_H9FCS1: Mediator of DNA damage checkpoint protein 1 (Fragment) | -4.664635 | 0.0093509 |
| UniRef50_H9FCW3: Mediator of DNA damage checkpoint protein 1 (Fragment) | -4.789376 | 0.0089735 |
| UniRef50_U5Q3P3: Cytochrome c oxidase subunit 2 (Fragment) | -5.144596 | 0.0034020 |
| UniRef50_S9WFI9 | -7.807100 | 0.0001590 |
Negative fold changes are higher in blood samples while positive are higher in buccal_euk samples.
blse = genes.loc[:, ['Blood_Buccal_euk_FC', 'Blood_Buccal_euk_p']]
blse = blse[blse['Blood_Buccal_euk_p'] < 0.01]
blse = blse.sort_values(by=['Blood_Buccal_euk_FC'], ascending=False).rename(columns={'Blood_Buccal_euk_FC':'Log2 fold change', 'Blood_Buccal_euk_p':'p'})
blse = pd.concat([blse.head(n=50), blse.tail(n=50)])
blse = blse.groupby(by=blse.index, axis=0).mean()
blse.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$blse %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| UniRef50_Q8NHA8: Olfactory receptor 1F12 | 14.206455 | 0.0000000 |
| UniRef50_Q6ZRH3 | 13.568667 | 0.0000000 |
| UniRef50_Q6Y2K7: LP5852 protein | 11.728181 | 0.0000000 |
| UniRef50_K4AUT0 | 11.358313 | 0.0020925 |
| UniRef50_Q18CG9: 50S ribosomal protein L29 | 10.819685 | 0.0000001 |
| UniRef50_C9J0J3: Ectonucleoside triphosphate diphosphohydrolase 3 (Fragment) | 10.401278 | 0.0000000 |
| UniRef50_Q99JC0: RRNA promoter binding protein | 10.400075 | 0.0000000 |
| UniRef50_D6R9G5: Kelch-like protein 2 (Fragment) | 10.279457 | 0.0000000 |
| UniRef50_Q0TMQ9: 30S ribosomal protein S14 type Z | 10.162296 | 0.0009675 |
| UniRef50_A4VYJ1 | 10.067180 | 0.0019393 |
| UniRef50_P0A2W0: Acyl carrier protein | 10.054894 | 0.0000272 |
| UniRef50_Q836B1: UPF0473 protein EF_1204 | 9.974626 | 0.0000055 |
| UniRef50_A4W3M3: 3-hydroxyacyl-[acyl-carrier-protein] dehydratase FabZ | 9.769790 | 0.0000723 |
| UniRef50_F5XS62 | 9.763733 | 0.0008645 |
| UniRef50_UPI00029DB511: PREDICTED: E3 ubiquitin-protein ligase Itchy homolog | 9.720111 | 0.0000000 |
| UniRef50_F2QEQ4: Rhodanese-related sulfurtransferase | 9.711035 | 0.0006302 |
| UniRef50_UPI000377D7E9: hypothetical protein, partial | 9.694725 | 0.0000502 |
| UniRef50_I1ZKW3 | 9.563515 | 0.0000809 |
| UniRef50_G0ICB9 | 9.514858 | 0.0000443 |
| UniRef50_A8RZY1 | 9.453512 | 0.0070344 |
| UniRef50_Q67MT5: Peptide chain release factor 3 | 9.276501 | 0.0002504 |
| UniRef50_T1Z5N5: 50S ribosomal protein L3 | 9.236708 | 0.0003293 |
| UniRef50_Q02419: Protein PhnA | 9.125880 | 0.0000689 |
| UniRef50_Q2FFI7: Adenylosuccinate lyase | 9.089428 | 0.0002281 |
| UniRef50_A1WZJ2: 60 kDa chaperonin | 9.049558 | 0.0002462 |
| UniRef50_G0IBT1: Integral membrane protein | 9.018158 | 0.0000643 |
| UniRef50_A4QMB1: ORF58e | 8.988876 | 0.0000163 |
| UniRef50_Q5WII6: IS605/IS200 family transposase | 8.983484 | 0.0000330 |
| UniRef50_Q5FLZ3: tRNA N6-adenosine threonylcarbamoyltransferase | 8.975426 | 0.0000814 |
| UniRef50_Q97R65: Dihydroorotate dehydrogenase B (NAD(+)), catalytic subunit | 8.963234 | 0.0000471 |
| UniRef50_P67185: Probable transcriptional regulatory protein SAG1645 | 8.933849 | 0.0000210 |
| UniRef50_X1NBY2: Marine sediment metagenome DNA, contig: S06H3_S14577 | 8.925777 | 0.0000003 |
| UniRef50_P14949: Thioredoxin | 8.916874 | 0.0000713 |
| UniRef50_Q9JTA4: UPF0210 protein NMA1908 | 8.905626 | 0.0002581 |
| UniRef50_Q839Y4: Adenylosuccinate synthetase | 8.875518 | 0.0001953 |
| UniRef50_S7YHL0 | 8.867961 | 0.0000160 |
| UniRef50_A4VYP1: 30S ribosomal protein S10 | 8.846283 | 0.0000723 |
| UniRef50_Q831A8: UDP-N-acetylglucosamine 1-carboxyvinyltransferase 2 | 8.838642 | 0.0001587 |
| UniRef50_A8AVF5: Isochorismatase family protein | 8.831903 | 0.0001626 |
| UniRef50_UPI000273D5FE | 8.828918 | 0.0000000 |
| UniRef50_Q6GDQ0: ATP-dependent Clp protease ATP-binding subunit ClpL | 8.761473 | 0.0001248 |
| UniRef50_C1CQE1: UPF0340 protein SPT_0687 | 8.761093 | 0.0006395 |
| UniRef50_G7SQ06: Cell wall-associated hydrolase | 8.744909 | 0.0003129 |
| UniRef50_UPI0001D62081 | 8.733988 | 0.0000000 |
| UniRef50_P37747: UDP-galactopyranose mutase | 8.726289 | 0.0001058 |
| UniRef50_P90591: PV14 protein | 8.698111 | 0.0000001 |
| UniRef50_Q1C306: Serine/threonine transporter SstT | 8.670691 | 0.0001549 |
| UniRef50_A3CNC4: Rhodanese-like domain protein, putative | 8.650163 | 0.0000002 |
| UniRef50_A0A024DGI8: Membrane protein | 8.634631 | 0.0000937 |
| UniRef50_Q3K3S9: tRNA pseudouridine synthase A | 8.615080 | 0.0002396 |
| UniRef50_I7G8L2: Macaca fascicularis brain cDNA clone: QmoA-11833, similar to human chemokine (C-C motif) ligand 5 (CCL5), mRNA, RefSeq: NM_002985.2 | -3.057083 | 0.0044197 |
| UniRef50_UPI000273D3DB: PREDICTED: histone demethylase UTY-like | -3.058105 | 0.0006568 |
| UniRef50_X1RTY3: Marine sediment metagenome DNA, contig: S12H4_L05131 | -3.088100 | 0.0001320 |
| UniRef50_Q6ZNX4: CDNA FLJ26942 fis, clone RCT07464 | -3.088863 | 0.0000248 |
| UniRef50_Q9BZ60: FKSG63 | -3.094164 | 0.0043170 |
| UniRef50_H2PBI0 | -3.194845 | 0.0050537 |
| UniRef50_Q8NF05: FLJ00399 protein (Fragment) | -3.195422 | 0.0017753 |
| UniRef50_G3RYZ2 | -3.202026 | 0.0017820 |
| UniRef50_Q1W209: Embryonic stem cell-related gene protein | -3.298909 | 0.0040440 |
| UniRef50_UPI00045E37E4: PREDICTED: protein GVQW1-like, partial | -3.521571 | 0.0021273 |
| UniRef50_F6TJB2 | -3.599640 | 0.0073896 |
| UniRef50_W1YJZ2 | -3.669211 | 0.0000867 |
| UniRef50_F7GRN4 | -3.714752 | 0.0009200 |
| UniRef50_Q6ZUG4: cDNA FLJ43741 fis, clone TESTI2017727 | -3.918750 | 0.0026472 |
| UniRef50_Q6FG63: FLJ10385 protein | -4.086130 | 0.0029433 |
| UniRef50_W1WD51 | -4.129838 | 0.0000402 |
| UniRef50_O95662: KpnI repetitive sequence (T-betaG41) 3kb downstream of beta-globin protein (Fragment) | -4.204094 | 0.0015056 |
| UniRef50_Q8WY51: HC6 | -4.225498 | 0.0064566 |
| UniRef50_S8BUY4 | -4.259124 | 0.0000409 |
| UniRef50_F6S8D7 | -4.301492 | 0.0058294 |
| UniRef50_Q8N329: C3orf64 protein | -4.310691 | 0.0051846 |
| UniRef50_I7G4S6: Macaca fascicularis brain cDNA clone: QbsB-10410, similar to human hypothetical protein FLJ20457 (FLJ20457), mRNA, RefSeq: NM_017832.2 | -4.419854 | 0.0067237 |
| UniRef50_F7GW59 | -4.437119 | 0.0000852 |
| UniRef50_Q8N287: cDNA FLJ33669 fis, clone BRAMY2028740 | -4.463438 | 0.0061385 |
| UniRef50_W1V5A8 | -4.529869 | 0.0000501 |
| UniRef50_Q6ZR97: cDNA FLJ46534 fis, clone THYMU3037052, weakly similar to Homo sapiens HIV TAT specific factor 1 (HTATSF1) | -4.612080 | 0.0042394 |
| UniRef50_UPI000292A7A2 | -4.755766 | 0.0050788 |
| UniRef50_G2HH39: Zinc finger protein 580 | -4.874037 | 0.0066219 |
| UniRef50_F7HM41 | -5.239724 | 0.0041429 |
| UniRef50_G9L4Y9 | -5.290411 | 0.0077388 |
| UniRef50_F7ERF9 | -5.464497 | 0.0084036 |
| UniRef50_UPI0001AF4819: hypothetical protein | -5.485818 | 0.0013148 |
| UniRef50_I7G402: Macaca fascicularis brain cDNA clone: QbsA-12013, similar to human glutamate receptor, metabotropic 6 (GRM6), mRNA, RefSeq: NM_000843.2 | -5.510659 | 0.0026030 |
| UniRef50_G7PG11 | -5.567385 | 0.0067932 |
| UniRef50_Q96LU0: CDNA FLJ25069 fis, clone CBL05145 | -5.625021 | 0.0047341 |
| UniRef50_Q9UI61: PRO0470 | -5.839999 | 0.0088171 |
| UniRef50_Q9GMT0 | -5.905245 | 0.0092684 |
| UniRef50_Q6ZNZ6: CDNA FLJ26821 fis, clone PRS06629 | -5.952807 | 0.0070262 |
| UniRef50_M3Z9H5 | -6.138953 | 0.0025770 |
| UniRef50_F7CR38 | -6.162863 | 0.0077810 |
| UniRef50_X6R7Y7: Intraflagellar transport protein 25 homolog | -6.496334 | 0.0059577 |
| UniRef50_K7EJ15: Ankyrin repeat domain-containing protein 20A3 (Fragment) | -6.986405 | 0.0002879 |
| UniRef50_F6ZQ59 | -7.076242 | 0.0070378 |
| UniRef50_Q4XHH2 | -7.100608 | 0.0066861 |
| UniRef50_Q8N210: Retbindin | -7.152420 | 0.0042904 |
| UniRef50_H7BYS4 | -7.256297 | 0.0030867 |
| UniRef50_Q4Y9P0 | -7.638074 | 0.0045271 |
| UniRef50_Q8WTZ3: Zinc finger protein ENSP00000375192 | -8.305599 | 0.0091100 |
| UniRef50_S9WFI9 | -9.303851 | 0.0000001 |
| UniRef50_S4L0C2 | -9.774147 | 0.0042497 |
genes = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann3/genes.csv', header=0, index_col=0)
Negative fold changes are higher in blood samples while positive are higher in buccal_euk samples.
bs3 = genes.loc[:, ['Saliva_Blood_FC', 'Saliva_Blood_p']]
bs3 = bs3[bs3['Saliva_Blood_p'] < 0.01]
bs3 = bs3.sort_values(by=['Saliva_Blood_FC'], ascending=False).rename(columns={'Saliva_Blood_FC':'Log2 fold change', 'Saliva_Blood_p':'p'})
bs3 = pd.concat([bs3.head(n=50), bs3.tail(n=50)])
bs3 = bs3.groupby(by=bs3.index, axis=0).mean()
bs3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bs3 %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| UniRef90_A0A059UGD1 | 7.822147 | 0.0014155 |
| UniRef50_Q5UGI3 | 5.981764 | 0.0092449 |
| UniRef50_A7SYF9 | 4.726831 | 0.0007634 |
| UniRef50_UPI000D0C899F | 4.688802 | 0.0069116 |
| UniRef50_A0A1A9AHF0 | 4.229893 | 0.0036662 |
| UniRef50_A0A0D9S5U8 | 4.186541 | 0.0000897 |
| UniRef90_A0A0B7AG49 | 4.082921 | 0.0011859 |
| UniRef90_A0A1A9AHF0 | 3.974740 | 0.0022994 |
| UniRef90_UPI0005298A3D | 3.961044 | 0.0032579 |
| UniRef50_R4GNJ2: 60S ribosomal protein L5 | 3.947971 | 0.0013257 |
| UniRef90_UPI000CEF8CC7 | 3.860900 | 0.0082239 |
| UniRef90_UPI000D0A1051 | 3.849842 | 0.0014695 |
| UniRef50_F6QPS0: Cell division cycle protein 27 homolog | 3.832752 | 0.0082278 |
| UniRef90_F6TZJ6 | 3.817980 | 0.0016388 |
| UniRef90_A0A2V2KWK9 | 3.769290 | 0.0000094 |
| UniRef50_P62975: Ubiquitin | 3.740375 | 0.0039747 |
| UniRef50_A0A084WT89 | 3.740220 | 0.0039688 |
| UniRef50_A0A0A6YW67 | 3.740220 | 0.0039688 |
| UniRef50_UPI000D671FA2 | 3.736353 | 0.0039639 |
| UniRef90_F7DPM2 | 3.714813 | 0.0000831 |
| UniRef90_A0A2U2VZN6 | 3.708737 | 0.0046169 |
| UniRef50_UPI0008F51C75 | 3.703223 | 0.0013197 |
| UniRef90_H2NHX1 | 3.685370 | 0.0003670 |
| UniRef50_A0A2U2VZN6 | 3.682427 | 0.0048597 |
| UniRef90_F7DUA9 | 3.649735 | 0.0002683 |
| UniRef50_A0A2V2KWK9 | 3.614798 | 0.0000136 |
| UniRef90_F7A9Z7 | 3.604416 | 0.0002045 |
| UniRef90_A0A2I3T9X4 | 3.586077 | 0.0017932 |
| UniRef90_A0A2U2VXH9 | 3.584093 | 0.0007692 |
| UniRef50_F7DUA9 | 3.571522 | 0.0004647 |
| UniRef50_A0A023AVK8: Putative endonuclease/reverse transcriptase | 3.481009 | 0.0016895 |
| UniRef90_UPI000D09E8B4 | 3.460189 | 0.0022208 |
| UniRef90_A0A2U2VWN7 | 3.455927 | 0.0076869 |
| UniRef50_G3SIN0 | 3.444951 | 0.0014334 |
| UniRef90_K0Z2P5 | 3.426548 | 0.0026335 |
| UniRef90_A0A2D4PUW7 | 3.422458 | 0.0003262 |
| UniRef90_A0A2R8MFL8 | 3.415982 | 0.0001580 |
| UniRef90_A0A2R8Z5V9 | 3.414382 | 0.0022742 |
| UniRef90_A0A2I3SFR4 | 3.404313 | 0.0079436 |
| UniRef90_F2VPV5 | 3.383545 | 0.0018872 |
| UniRef90_G3S285 | 3.381721 | 0.0013277 |
| UniRef50_A0A2U2VXH9 | 3.357922 | 0.0004359 |
| UniRef90_S9X3V4 | 3.350696 | 0.0036189 |
| UniRef90_H2PTC1 | 3.348218 | 0.0001353 |
| UniRef50_A0A2U2VWN7 | 3.342515 | 0.0082517 |
| UniRef90_G7NGG9 | 3.340707 | 0.0001180 |
| UniRef50_UPI000B7B8E00 | 3.332866 | 0.0020228 |
| UniRef90_Q99878 | 3.328294 | 0.0034219 |
| UniRef50_UPI0008F4FD63 | 3.323762 | 0.0000537 |
| UniRef90_Q4G1Z1 | 3.318735 | 0.0023210 |
| UniRef90_A0A134BX12 | -13.225572 | 0.0000000 |
| UniRef90_C9MQ90 | -13.229306 | 0.0000000 |
| UniRef90_D3I8L8 | -13.245145 | 0.0000000 |
| UniRef90_I0GNP0 | -13.259245 | 0.0000008 |
| UniRef90_A7B8Q4 | -13.271813 | 0.0000000 |
| UniRef90_G0ICA3 | -13.295380 | 0.0000002 |
| UniRef90_A0A2D3LM49 | -13.369449 | 0.0000000 |
| UniRef90_F8DKM2 | -13.372066 | 0.0000002 |
| UniRef90_A0A2C6B6C9 | -13.374933 | 0.0012569 |
| UniRef90_C9N1P8 | -13.387055 | 0.0000002 |
| UniRef90_G1WBW8 | -13.387095 | 0.0000000 |
| UniRef90_D9RRC6 | -13.421057 | 0.0000002 |
| UniRef90_G5GL71 | -13.433467 | 0.0002349 |
| UniRef50_A0A2J8WD87 | -13.497056 | 0.0000000 |
| UniRef90_X5NU12 | -13.504789 | 0.0000000 |
| UniRef90_A7B8Q3 | -13.583736 | 0.0000000 |
| UniRef90_U2J5N0 | -13.604911 | 0.0000000 |
| UniRef90_A0A139P8V9 | -13.685165 | 0.0000000 |
| UniRef90_A7B8Q2 | -13.692794 | 0.0000000 |
| UniRef90_K7ESR6 | -13.698188 | 0.0000000 |
| UniRef90_A0A134C1V4 | -13.706270 | 0.0000000 |
| UniRef90_G3RWI0 | -13.715008 | 0.0000000 |
| UniRef90_Q8CWN7 | -13.731959 | 0.0000000 |
| UniRef90_E4LCZ2 | -13.743743 | 0.0000000 |
| UniRef90_D4TVU3 | -13.746518 | 0.0000003 |
| UniRef90_A0A0K2J3Z9 | -13.803028 | 0.0000041 |
| UniRef90_K0ZJH0 | -13.821083 | 0.0000000 |
| UniRef90_D3IDX3 | -13.840488 | 0.0001256 |
| UniRef90_J7TNS4 | -13.858979 | 0.0000000 |
| UniRef90_V8BHM6 | -13.946408 | 0.0000000 |
| UniRef50_Q9HC73-2: Isoform 2 of Cytokine receptor-like factor 2 | -14.139921 | 0.0000000 |
| UniRef90_A0A2D3LLY2 | -14.383255 | 0.0000009 |
| UniRef90_Q6TDT1 | -14.420960 | 0.0000000 |
| UniRef50_Q6TDT1: Protein transactivated by hepatitis B virus E antigen | -14.482286 | 0.0000000 |
| UniRef90_A0A2I2YPF9 | -14.513736 | 0.0000000 |
| UniRef90_Q8RHH9 | -14.516243 | 0.0011083 |
| UniRef90_A0A134A205 | -14.624728 | 0.0000001 |
| UniRef90_U2Z4S8 | -14.773062 | 0.0000000 |
| UniRef90_X6PVX8 | -14.810247 | 0.0000000 |
| UniRef90_R5FPP1 | -14.840933 | 0.0000000 |
| UniRef90_J7TUV6 | -14.890002 | 0.0000000 |
| UniRef50_G3RWI0 | -14.990248 | 0.0000000 |
| UniRef90_A0A2I1TTC0 | -15.008420 | 0.0000000 |
| UniRef90_U2ZPB1 | -15.237430 | 0.0000000 |
| UniRef90_C4V1U8 | -15.399598 | 0.0000001 |
| UniRef90_J7SHX6 | -15.431610 | 0.0000000 |
| UniRef90_C9MYW9 | -15.964145 | 0.0000000 |
| UniRef90_A0A134C1F2 | -15.999313 | 0.0000002 |
| UniRef90_Q5LXM5 | -16.079023 | 0.0000000 |
| UniRef90_V8M0W9 | -17.605024 | 0.0000000 |
Negative fold changes are higher in buccal samples while positive are higher in blood samples.
bb3 = genes.loc[:, ['Buccal_Blood_FC', 'Buccal_Blood_p']]
bb3 = bb3[bb3['Buccal_Blood_p'] < 0.01]
bb3 = bb3.sort_values(by=['Buccal_Blood_FC'], ascending=False).rename(columns={'Buccal_Blood_FC':'Log2 fold change', 'Buccal_Blood_p':'p'})
bb3 = pd.concat([bb3.head(n=50), bb3.tail(n=50)])
bb3 = bb3.groupby(by=bb3.index, axis=0).mean()
bb3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bb3 %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| UniRef50_UPI000D0C899F | 10.129773 | 0.0019113 |
| UniRef90_A0A059UGD1 | 7.436453 | 0.0067404 |
| UniRef90_B0X777 | 2.575430 | 0.0052864 |
| UniRef90_A0A2M4CW35 | 2.444204 | 0.0044833 |
| UniRef90_R4G555 | 2.444032 | 0.0044835 |
| UniRef90_D2HWI9 | 2.443863 | 0.0044837 |
| UniRef90_P84040 | 2.443863 | 0.0044837 |
| UniRef90_J9NRV0 | 2.443863 | 0.0044837 |
| UniRef90_P35059 | 2.326871 | 0.0048374 |
| UniRef90_A0A096MDK0 | 2.313180 | 0.0048180 |
| UniRef90_H0ZGB0 | 2.312709 | 0.0048194 |
| UniRef90_U3I341 | 2.312378 | 0.0048204 |
| UniRef90_W5QFQ2 | 2.312378 | 0.0048204 |
| UniRef90_W5PHH3 | 2.312378 | 0.0048204 |
| UniRef90_G3U9V5 | 2.312205 | 0.0048210 |
| UniRef90_X1WGM1 | 2.312028 | 0.0048215 |
| UniRef90_UPI0003C8CAF6 | 2.312028 | 0.0048215 |
| UniRef90_G3U1Z3 | 2.312028 | 0.0048215 |
| UniRef90_G3U8T3 | 2.312028 | 0.0048215 |
| UniRef90_G3UKU6 | 2.312028 | 0.0048215 |
| UniRef90_UPI00073339EF | 2.312028 | 0.0048215 |
| UniRef90_P62805 | 2.311852 | 0.0048221 |
| UniRef50_F6X6A6 | 2.027367 | 0.0022089 |
| UniRef90_P61247 | 2.013804 | 0.0008346 |
| UniRef90_UPI00075FE3C9 | 1.992473 | 0.0093257 |
| UniRef90_A0A2U2VXH9 | 1.983799 | 0.0022542 |
| UniRef90_H2NHX1 | 1.954676 | 0.0002081 |
| UniRef90_X1V349 | 1.870192 | 0.0081726 |
| UniRef90_D6RAT0 | 1.862069 | 0.0014769 |
| UniRef90_A0A2R8MFL8 | 1.811106 | 0.0001598 |
| UniRef50_A0A2U2VXH9 | 1.802437 | 0.0017445 |
| UniRef90_A0A1U7RBC3 | 1.801536 | 0.0013585 |
| UniRef90_Q4GWZ2 | 1.791779 | 0.0056083 |
| UniRef90_Q3TXR5 | 1.791113 | 0.0050731 |
| UniRef90_P14206 | 1.778647 | 0.0033932 |
| UniRef90_G1N211 | 1.774490 | 0.0024735 |
| UniRef90_A0A091FAX3 | 1.774434 | 0.0024739 |
| UniRef90_UPI000C313812 | 1.765998 | 0.0001919 |
| UniRef50_UPI0008F51C75 | 1.712422 | 0.0004572 |
| UniRef90_A0A2I0MJF5 | 1.696810 | 0.0001676 |
| UniRef90_A0A2I3T9X4 | 1.690091 | 0.0000037 |
| UniRef50_A0A286ZVP5 | 1.687382 | 0.0007155 |
| UniRef50_G3SIN0 | 1.677557 | 0.0005188 |
| UniRef50_A0A2S2P7V1 | 1.672817 | 0.0043278 |
| UniRef90_H2PTC1 | 1.666913 | 0.0011545 |
| UniRef90_UPI000D0A1051 | 1.654833 | 0.0004163 |
| UniRef90_UPI000D09E8B4 | 1.625457 | 0.0029957 |
| UniRef90_Q7TP33 | 1.599129 | 0.0013668 |
| UniRef50_A0A2V2KWK9 | 1.595489 | 0.0005246 |
| UniRef90_A0A1D5RDK4 | 1.588584 | 0.0050427 |
| UniRef90_Q9F0R4 | -13.037667 | 0.0000163 |
| UniRef90_A0A0T8AUD9 | -13.047165 | 0.0000107 |
| UniRef90_A0A0E2UGZ4 | -13.058239 | 0.0000194 |
| UniRef90_A3CK83 | -13.063269 | 0.0001287 |
| UniRef90_P66717 | -13.069189 | 0.0000311 |
| UniRef90_E6KK04 | -13.081844 | 0.0000318 |
| UniRef90_B5E3W1 | -13.082211 | 0.0000138 |
| UniRef90_Q04J38 | -13.084233 | 0.0001112 |
| UniRef90_Q04JK8 | -13.088419 | 0.0000003 |
| UniRef90_C0MBU0 | -13.107533 | 0.0000337 |
| UniRef90_A0A139RMK4 | -13.115100 | 0.0000295 |
| UniRef90_Q97TC4 | -13.123406 | 0.0000813 |
| UniRef90_Q04JE2 | -13.123461 | 0.0000036 |
| UniRef90_P59186 | -13.139954 | 0.0000272 |
| UniRef90_Q04HZ9 | -13.140601 | 0.0000210 |
| UniRef90_I1ZPB5 | -13.152555 | 0.0000212 |
| UniRef90_Q8DS25 | -13.169243 | 0.0000566 |
| UniRef90_A0A139QR72 | -13.178519 | 0.0000497 |
| UniRef90_P66648 | -13.188761 | 0.0000276 |
| UniRef90_A0A0D0ZIG5 | -13.223656 | 0.0000044 |
| UniRef90_A8AZM0 | -13.240020 | 0.0000192 |
| UniRef90_A4VYP5 | -13.244254 | 0.0000051 |
| UniRef90_A0A139PM50 | -13.248708 | 0.0000644 |
| UniRef50_A0A0D0ZIG5 | -13.254257 | 0.0000045 |
| UniRef90_A0A2Z5TND2 | -13.255398 | 0.0000087 |
| UniRef90_S7XR44 | -13.274436 | 0.0000017 |
| UniRef90_P66394 | -13.288386 | 0.0000455 |
| UniRef90_Q8DS23 | -13.297321 | 0.0000254 |
| UniRef90_A0A139QR65 | -13.299838 | 0.0000006 |
| UniRef90_A3CRA2 | -13.339479 | 0.0000476 |
| UniRef90_Q8DS22 | -13.355691 | 0.0000032 |
| UniRef90_Q8CZ89 | -13.374704 | 0.0000373 |
| UniRef90_Q04IS6 | -13.503939 | 0.0000237 |
| UniRef90_A8AZL7 | -13.509927 | 0.0000326 |
| UniRef90_P48853 | -13.531313 | 0.0000019 |
| UniRef90_P66634 | -13.531343 | 0.0000420 |
| UniRef90_A8AZD5 | -13.544545 | 0.0000219 |
| UniRef90_P66137 | -13.580103 | 0.0000133 |
| UniRef90_B4U516 | -13.648142 | 0.0000071 |
| UniRef90_U5P1K2 | -13.716305 | 0.0000127 |
| UniRef90_V8IAL5 | -13.720273 | 0.0000123 |
| UniRef90_Q97T34 | -13.814773 | 0.0000025 |
| UniRef90_H2P484 | -13.911094 | 0.0000000 |
| UniRef90_A5MC70 | -14.398737 | 0.0000012 |
| UniRef90_Q04ML8 | -14.538773 | 0.0000068 |
| UniRef90_E6KLV2 | -14.639250 | 0.0000185 |
| UniRef90_K0ZJH0 | -14.689870 | 0.0000000 |
| UniRef90_Q3K3Q0 | -14.715563 | 0.0000019 |
| UniRef90_A0A2I1YXE0 | -15.202859 | 0.0001122 |
| UniRef90_U2ZPB1 | -17.049232 | 0.0000124 |
Negative fold changes are higher in saliva samples while positive are higher in buccal samples.
sb3 = genes.loc[:, ['Saliva_Buccal_FC', 'Saliva_Buccal_p']]
sb3 = sb3[sb3['Saliva_Buccal_p'] < 0.01]
sb3 = sb3.sort_values(by=['Saliva_Buccal_FC'], ascending=False).rename(columns={'Saliva_Buccal_FC':'Log2 fold change', 'Saliva_Buccal_p':'p'})
sb3 = pd.concat([sb3.head(n=50), sb3.tail(n=50)])
sb3 = sb3.groupby(by=sb3.index, axis=0).mean()
sb3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$sb3 %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| UniRef50_A0A081Q6T6 | 8.862121 | 0.0051140 |
| UniRef90_A0A139RCU2 | 8.693669 | 0.0049375 |
| UniRef50_W1YUQ6: Iron-sulfur cluster binding protein (Fragment) | 8.582003 | 0.0003729 |
| UniRef90_E4LB29 | 8.323290 | 0.0000000 |
| UniRef50_F9PVE3: Gram-positive signal peptide protein, YSIRK family | 8.231943 | 0.0047906 |
| UniRef90_A8AXT8 | 8.212688 | 0.0057492 |
| UniRef90_A0A1F1IWP7 | 8.112856 | 0.0046921 |
| UniRef90_W1WE06 | 8.031932 | 0.0007598 |
| UniRef90_A0A1X1I534 | 7.837456 | 0.0097395 |
| UniRef50_W1XVI8: NLPA lipoprotein (Fragment) | 7.799328 | 0.0001882 |
| UniRef90_A0A0T8ACY0 | 7.686068 | 0.0000941 |
| UniRef90_W1V072 | 7.482241 | 0.0006265 |
| UniRef90_A0A081R2B4 | 7.325415 | 0.0082141 |
| UniRef90_A0A075B7C8 | 7.179019 | 0.0007298 |
| UniRef90_Q708Q5 | 7.141482 | 0.0047735 |
| UniRef50_W1Y3H1: Conserved carboxylase region (Fragment) | 7.116781 | 0.0000299 |
| UniRef50_E7N911 | 7.110605 | 0.0004794 |
| UniRef90_E7N911 | 7.110605 | 0.0000077 |
| UniRef50_A0A0T9FSR6 | 7.100827 | 0.0028679 |
| UniRef50_E3GZN7 | 6.996615 | 0.0008823 |
| UniRef90_E3GZN7 | 6.996615 | 0.0008823 |
| UniRef90_D6KK22 | 6.944447 | 0.0013294 |
| UniRef90_W1YLT1 | 6.934864 | 0.0000365 |
| UniRef90_A0A139PTT3 | 6.859800 | 0.0023204 |
| UniRef50_A0A0U0K5K2 | 6.751190 | 0.0004157 |
| UniRef90_F0IKB1 | 6.732742 | 0.0024579 |
| UniRef50_A0A139PIN8 | 6.620595 | 0.0012891 |
| UniRef50_E3H3Y7 | 6.497254 | 0.0016452 |
| UniRef90_E3H3Y7 | 6.497254 | 0.0015394 |
| UniRef90_U7V1B2 | 6.481539 | 0.0084317 |
| UniRef90_Q4QN10 | 6.475568 | 0.0014609 |
| UniRef90_F7GBC1 | 6.350249 | 0.0014280 |
| UniRef50_A0A0T9GUE3 | 6.341451 | 0.0020606 |
| UniRef50_A0A0U0L3I1 | 6.252453 | 0.0001216 |
| UniRef50_A0A0U0LBU0 | 6.241439 | 0.0011098 |
| UniRef50_O54422 | 6.236804 | 0.0017761 |
| UniRef50_UPI000175411A | 6.232737 | 0.0000700 |
| UniRef90_A0A0T8WBY0 | 6.192122 | 0.0005151 |
| UniRef50_Q708Q5 | 6.178868 | 0.0052554 |
| UniRef90_A0A150NGX6 | 6.032115 | 0.0019991 |
| UniRef50_F7GBC1 | 5.976026 | 0.0020113 |
| UniRef90_A0A0Y1ZBZ4 | 5.931165 | 0.0000145 |
| UniRef90_A0A0Y2AQK8 | 5.915247 | 0.0009299 |
| UniRef90_A5M8D7 | 5.914862 | 0.0003115 |
| UniRef50_A5M8D7: Peptidase, M20/M25/M40 family protein | 5.914862 | 0.0003115 |
| UniRef50_A0A139Q0B7 | 5.907665 | 0.0048237 |
| UniRef90_F5VX89 | 5.891731 | 0.0044566 |
| UniRef50_F9Q3H8: Competence-induced protein Ccs4 family protein | 5.771735 | 0.0082617 |
| UniRef90_A0A0F2CH56 | 5.766957 | 0.0069526 |
| UniRef90_A0A0T8AE83 | 5.763812 | 0.0023496 |
| UniRef90_A6LEG9 | -12.524120 | 0.0012097 |
| UniRef90_F0F5Q9 | -12.555641 | 0.0000000 |
| UniRef90_A0A2D3LNT5 | -12.562524 | 0.0000001 |
| UniRef90_U2KD45 | -12.566643 | 0.0000000 |
| UniRef90_U2K4J4 | -12.602537 | 0.0003876 |
| UniRef90_L9PWI7 | -12.628225 | 0.0007691 |
| UniRef90_F9M3J5 | -12.655427 | 0.0000767 |
| UniRef90_U2K4R3 | -12.670375 | 0.0000000 |
| UniRef90_F0H8Q7 | -12.678580 | 0.0000001 |
| UniRef90_C4V316 | -12.706970 | 0.0000395 |
| UniRef90_A0A096C1X8 | -12.727731 | 0.0013362 |
| UniRef90_F9DLF8 | -12.741067 | 0.0000000 |
| UniRef90_A0A1B1I5Z7 | -12.743715 | 0.0009261 |
| UniRef90_D5EYY8 | -12.777063 | 0.0011725 |
| UniRef90_X4QQ61 | -12.797472 | 0.0000010 |
| UniRef90_C9N1P9 | -12.798616 | 0.0000003 |
| UniRef90_A0A0A2DYJ5 | -12.800275 | 0.0010219 |
| UniRef50_Q5T5H1: Alpha-endosulfine | -12.804101 | 0.0000000 |
| UniRef90_D9RWQ6 | -12.828632 | 0.0018162 |
| UniRef90_U2INL1 | -12.833208 | 0.0000536 |
| UniRef90_A0A2K9HB98 | -12.843978 | 0.0001843 |
| UniRef90_A0A1B1I6C5 | -12.858335 | 0.0000000 |
| UniRef90_G1VHD9 | -12.937039 | 0.0000003 |
| UniRef90_G1VAR6 | -12.960851 | 0.0000002 |
| UniRef90_A0A0K1NIV2 | -12.972303 | 0.0000000 |
| UniRef90_U2JAK3 | -13.035604 | 0.0000026 |
| UniRef90_A0A2G9ICB3 | -13.072553 | 0.0004154 |
| UniRef90_D4S901 | -13.113845 | 0.0000006 |
| UniRef90_U2KIJ5 | -13.127126 | 0.0000000 |
| UniRef90_A0A2D3L930 | -13.129930 | 0.0000001 |
| UniRef90_D4CUG6 | -13.169832 | 0.0068414 |
| UniRef90_A0A134A7E7 | -13.209713 | 0.0000134 |
| UniRef90_A0A2D3L5P2 | -13.224481 | 0.0017631 |
| UniRef90_C9MQ90 | -13.229306 | 0.0000000 |
| UniRef90_I0GNP0 | -13.259245 | 0.0000008 |
| UniRef90_A0A2D3LM49 | -13.369449 | 0.0000000 |
| UniRef90_A0A2C6B6C9 | -13.374933 | 0.0073775 |
| UniRef90_G1WBW8 | -13.387095 | 0.0000000 |
| UniRef50_A0A2J8WD87 | -13.497056 | 0.0000000 |
| UniRef90_U2J5N0 | -13.604911 | 0.0000000 |
| UniRef90_K7ESR6 | -13.698188 | 0.0000000 |
| UniRef90_A0A134C1V4 | -13.706270 | 0.0000000 |
| UniRef90_A0A0K2J3Z9 | -13.803028 | 0.0000041 |
| UniRef90_A0A2D3LLY2 | -14.383255 | 0.0000009 |
| UniRef90_A0A2I2YPF9 | -14.513736 | 0.0000000 |
| UniRef90_Q8RHH9 | -14.516243 | 0.0011083 |
| UniRef90_A0A134A205 | -14.624728 | 0.0000001 |
| UniRef90_X6PVX8 | -14.810247 | 0.0008018 |
| UniRef90_R5FPP1 | -14.840933 | 0.0000000 |
| UniRef90_C4V1U8 | -15.399598 | 0.0000001 |
Negative fold changes are higher in saliva samples while positive are higher in saliva_euk samples.
ss3 = genes.loc[:, ['Saliva_Saliva_euk_FC', 'Saliva_Saliva_euk_p']]
ss3 = ss3[ss3['Saliva_Saliva_euk_p'] < 0.01]
ss3 = ss3.sort_values(by=['Saliva_Saliva_euk_FC'], ascending=False).rename(columns={'Saliva_Saliva_euk_FC':'Log2 fold change', 'Saliva_Saliva_euk_p':'p'})
ss3 = pd.concat([ss3.head(n=50), ss3.tail(n=50)])
ss3 = ss3.groupby(by=ss3.index, axis=0).mean()
ss3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$ss3 %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| UniRef90_A0A060Z7W3 | 8.282348 | 0.0003327 |
| UniRef90_A0A378VVT4 | 7.664048 | 0.0099090 |
| UniRef90_A0A378VVN6 | 7.584239 | 0.0027202 |
| UniRef50_J3QRX4: Coordinator of PRMT5 and differentiation stimulator | 6.821069 | 0.0063040 |
| UniRef90_UPI000011108B | 6.543879 | 0.0009548 |
| UniRef50_UPI000011108B | 6.345446 | 0.0010123 |
| UniRef50_A0A127SR39 | 6.073086 | 0.0000011 |
| UniRef90_G8FL39 | 6.001454 | 0.0017905 |
| UniRef50_G8FL39: Mitochondrial glutaryl-Coenzyme A dehydrogenase (Fragment) | 6.001454 | 0.0017905 |
| UniRef90_UPI0004F08662 | 5.864100 | 0.0007430 |
| UniRef90_F7EL88 | 5.482098 | 0.0003288 |
| UniRef90_UPI00092A8639 | 5.435772 | 0.0000039 |
| UniRef90_Q2PJC1 | 5.248742 | 0.0070655 |
| UniRef90_A0A182PRM8 | 5.098600 | 0.0072978 |
| UniRef50_A0A087X2D0 | 3.751685 | 0.0003786 |
| UniRef50_A0A179D271 | 2.986443 | 0.0017260 |
| UniRef50_P02301 | 2.732590 | 0.0021296 |
| UniRef90_H2PKK1 | 2.577370 | 0.0014792 |
| UniRef90_A0A0Q0XIY8 | 2.531513 | 0.0074443 |
| UniRef90_A0A2D2APV3 | 2.488688 | 0.0081355 |
| UniRef90_R0KIL0 | 2.479330 | 0.0048823 |
| UniRef50_P84243 | 2.387499 | 0.0036276 |
| UniRef50_Q9U281 | 2.325302 | 0.0070651 |
| UniRef90_A0A2J8RFQ3 | 2.228360 | 0.0020223 |
| UniRef90_G7NWY1 | 2.219460 | 0.0061305 |
| UniRef90_P68036-3 | 2.208144 | 0.0070047 |
| UniRef90_G7N124 | 2.201553 | 0.0043911 |
| UniRef90_F7DGC8 | 2.193363 | 0.0026905 |
| UniRef90_I7GP34 | 2.184647 | 0.0032542 |
| UniRef50_F6XQ29: Histone H3 | 2.177236 | 0.0031249 |
| UniRef50_A0A182JX25 | 2.170923 | 0.0026493 |
| UniRef50_A0A2J8IJJ7 | 2.150553 | 0.0098009 |
| UniRef90_V9H061 | 2.051102 | 0.0078702 |
| UniRef90_A0A2U3XFV7 | 2.039427 | 0.0050353 |
| UniRef90_A0A2R8Z8D0 | 2.028121 | 0.0004405 |
| UniRef90_F7BWE8 | 1.992781 | 0.0072666 |
| UniRef50_X1QTZ0: Marine sediment metagenome DNA, contig: S12H4_C02507 | 1.987414 | 0.0079446 |
| UniRef50_A0A2U3XFV7 | 1.968830 | 0.0038186 |
| UniRef50_V9H061: Unnamed HERV-H protein (Fragment) | 1.947316 | 0.0060155 |
| UniRef50_G5E9R0: Actin, cytoplasmic 1 | 1.910023 | 0.0060873 |
| UniRef50_I7GP34: Macaca fascicularis brain cDNA clone: QflA-23777, similar to human hypothetical protein MGC33637 (MGC33637), mRNA, RefSeq: NM_152596.2 | 1.906366 | 0.0036016 |
| UniRef50_A0A1D5PY49 | 1.887344 | 0.0075014 |
| UniRef50_B8A6G2 | 1.886034 | 0.0065951 |
| UniRef90_A0A2R8M9T5 | 1.882532 | 0.0045321 |
| UniRef50_S9XUN5 | 1.876732 | 0.0060354 |
| UniRef50_J3KT65 | 1.858143 | 0.0068394 |
| UniRef90_F7CBQ1 | 1.840675 | 0.0020881 |
| UniRef50_A0A0D3RVU0 | 1.833649 | 0.0023064 |
| UniRef90_A0A2K5CZG7 | 1.827527 | 0.0061136 |
| UniRef90_UPI0000E260C0 | 1.813880 | 0.0043698 |
| UniRef90_UPI000D0C7B1B | -9.849761 | 0.0000000 |
| UniRef90_S7X8Y5 | -9.864995 | 0.0041921 |
| UniRef90_D5RFR1 | -9.891940 | 0.0001036 |
| UniRef90_Q7P3Y9 | -9.904060 | 0.0003434 |
| UniRef90_D5RFA4 | -9.932169 | 0.0000540 |
| UniRef90_X8HAW9 | -9.945829 | 0.0077347 |
| UniRef90_C9MMH6 | -9.983416 | 0.0094733 |
| UniRef90_A0A099BXI6 | -9.985410 | 0.0090131 |
| UniRef90_Q5F686 | -9.993084 | 0.0081696 |
| UniRef90_U7UXP7 | -10.003804 | 0.0001958 |
| UniRef90_F8HCU5 | -10.017341 | 0.0007747 |
| UniRef90_I2NQ87 | -10.050622 | 0.0067671 |
| UniRef90_I2NMZ8 | -10.079234 | 0.0001723 |
| UniRef90_Q8RHA6 | -10.094057 | 0.0035789 |
| UniRef90_I1ZKR1 | -10.115973 | 0.0000633 |
| UniRef90_J2Z9J7 | -10.132112 | 0.0061563 |
| UniRef90_F9PEW5 | -10.133380 | 0.0000032 |
| UniRef90_A0A3E4U7T2 | -10.167806 | 0.0000175 |
| UniRef50_G7N3R4 | -10.201602 | 0.0000011 |
| UniRef90_C8WAE2 | -10.218783 | 0.0087937 |
| UniRef90_E3CBB6 | -10.259544 | 0.0001214 |
| UniRef50_UPI000D0C7B1B | -10.259927 | 0.0000000 |
| UniRef90_E6KQV5 | -10.261484 | 0.0086967 |
| UniRef90_U1SCL3 | -10.268713 | 0.0081649 |
| UniRef90_F0FAA7 | -10.336859 | 0.0091603 |
| UniRef90_E8K809 | -10.489002 | 0.0019604 |
| UniRef90_E8K637 | -10.493244 | 0.0000178 |
| UniRef90_Q8RF81 | -10.523584 | 0.0017007 |
| UniRef90_W1VI75 | -10.547989 | 0.0000033 |
| UniRef90_E3CG68 | -10.551141 | 0.0000978 |
| UniRef90_C9N200 | -10.581499 | 0.0064119 |
| UniRef90_V8BDC1 | -10.633253 | 0.0000030 |
| UniRef90_J5HE19 | -10.650225 | 0.0025169 |
| UniRef90_Q8RIG0 | -10.670150 | 0.0002202 |
| UniRef90_UPI0002ADB0C8 | -10.813543 | 0.0000000 |
| UniRef90_U7UYD4 | -10.822357 | 0.0002397 |
| UniRef90_A0A2X0U125 | -10.923687 | 0.0000005 |
| UniRef90_F2QBW8 | -11.283063 | 0.0000006 |
| UniRef90_F2CGD2 | -11.460475 | 0.0014440 |
| UniRef90_I0Q7R4 | -11.559095 | 0.0000005 |
| UniRef90_A0A2R8MV81 | -11.712899 | 0.0000000 |
| UniRef90_Q8RDX9 | -11.818368 | 0.0003052 |
| UniRef90_E8K3H6 | -11.899789 | 0.0000204 |
| UniRef90_E6KT68 | -11.996792 | 0.0001027 |
| UniRef90_A0A2K5XWR8 | -12.061613 | 0.0000000 |
| UniRef90_D4S901 | -13.113845 | 0.0062068 |
| UniRef90_Q97R59 | -13.172387 | 0.0000854 |
| UniRef90_G0ICA3 | -13.295380 | 0.0003260 |
| UniRef90_Q8CWN7 | -13.731959 | 0.0000000 |
| UniRef90_K0ZJH0 | -13.821083 | 0.0000000 |
Negative fold changes are higher in buccal samples while positive are higher in buccal_euk samples.
bbe3 = genes.loc[:, ['Buccal_Buccal_euk_FC', 'Buccal_Buccal_euk_p']]
bbe3 = bbe3[bbe3['Buccal_Buccal_euk_p'] < 0.01]
bbe3 = bbe3.sort_values(by=['Buccal_Buccal_euk_FC'], ascending=False).rename(columns={'Buccal_Buccal_euk_FC':'Log2 fold change', 'Buccal_Buccal_euk_p':'p'})
bbe3 = pd.concat([bbe3.head(n=50), bbe3.tail(n=50)])
bbe3 = bbe3.groupby(by=bbe3.index, axis=0).mean()
bbe3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bbe3 %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| UniRef50_K7EML0: Pancreatic hormone | 7.0310054 | 0.0000004 |
| UniRef90_M0R117 | 5.5011850 | 0.0059983 |
| UniRef90_A0A2P5EZ99 | 3.9890821 | 0.0062551 |
| UniRef90_A0A200R7A7 | 3.2039160 | 0.0022188 |
| UniRef90_A0A2H5PBU6 | 2.8736481 | 0.0077219 |
| UniRef90_G3HPV7 | 2.0364489 | 0.0066518 |
| UniRef90_A0A1D5QXG0 | 1.6614831 | 0.0089477 |
| UniRef90_UPI0000481B5D | 1.3253055 | 0.0075491 |
| UniRef90_UPI000689AF8D | -0.7216742 | 0.0092929 |
| UniRef90_S5G7D6 | -0.8770696 | 0.0020936 |
| UniRef90_A0A1X3K4L0 | -0.9239208 | 0.0090500 |
| UniRef50_S5G7D6: Reverse transcriptase (Fragment) | -0.9485544 | 0.0024463 |
| UniRef90_A0A2I3HL55 | -1.1146873 | 0.0095099 |
| UniRef90_U3J7Y9 | -1.2395352 | 0.0088281 |
| UniRef90_UPI00071A5F49 | -1.2396464 | 0.0079720 |
| UniRef90_A0A0W0F566 | -1.3267132 | 0.0017491 |
| UniRef90_A0A226P011 | -1.3338100 | 0.0022315 |
| UniRef90_A0A1A6FWH6 | -1.3420823 | 0.0017333 |
| UniRef90_A0A3A9SZX0 | -1.4347887 | 0.0041514 |
| UniRef90_A0A0D9RN11 | -1.4398683 | 0.0084938 |
| UniRef90_A0A2D9B6U5 | -1.4889206 | 0.0004896 |
| UniRef90_A0A164U839 | -1.5027909 | 0.0037211 |
| UniRef90_UPI00077D3835 | -1.5080786 | 0.0004962 |
| UniRef90_UPI000904C9AB | -1.5308643 | 0.0027494 |
| UniRef90_L5JW24 | -1.5586683 | 0.0003841 |
| UniRef90_UPI0008FA61AA | -1.5594597 | 0.0025433 |
| UniRef90_UPI000C9E384A | -1.6019248 | 0.0025533 |
| UniRef90_UPI000643CC37 | -1.6127391 | 0.0025572 |
| UniRef90_A0A218V149 | -1.6310966 | 0.0045602 |
| UniRef90_T1JAH4 | -1.6456789 | 0.0025722 |
| UniRef90_A0A2K5P9A7 | -1.6551043 | 0.0001991 |
| UniRef50_P00395: Cytochrome c oxidase subunit 1 | -1.6793171 | 0.0029821 |
| UniRef90_A0A0B1SRV1 | -1.7092819 | 0.0035622 |
| UniRef50_P00414: Cytochrome c oxidase subunit 3 | -1.7130793 | 0.0027691 |
| UniRef90_A0A3B4CZH6 | -1.7158017 | 0.0003610 |
| UniRef90_A0A3B4WM40 | -1.7254403 | 0.0033884 |
| UniRef90_A0A1I7VUF0 | -1.7645164 | 0.0002052 |
| UniRef50_P03886: NADH-ubiquinone oxidoreductase chain 1 | -1.7747460 | 0.0030658 |
| UniRef90_A0A0N6W1Y9 | -1.7904823 | 0.0036262 |
| UniRef90_UPI000763751A | -1.8263882 | 0.0024975 |
| UniRef90_A0A0K0JA80 | -1.8470175 | 0.0001500 |
| UniRef90_F0YMZ5 | -1.8522387 | 0.0014339 |
| UniRef90_Q8HNQ2 | -1.8575767 | 0.0064046 |
| UniRef90_A7S708 | -1.8682428 | 0.0013884 |
| UniRef90_UPI000CE634A6 | -1.8721491 | 0.0006919 |
| UniRef90_UPI0007B848D5 | -1.9045456 | 0.0012979 |
| UniRef90_UPI0008FA2BF1 | -1.9189624 | 0.0021113 |
| UniRef90_A0A0N7BB96 | -1.9648185 | 0.0005051 |
| UniRef90_A0A2D4BWI6 | -1.9757272 | 0.0012930 |
| UniRef90_A0A3B4X1B7 | -1.9803506 | 0.0013545 |
| UniRef50_A0A343H6K7 | -9.8148188 | 0.0000000 |
| UniRef90_D8UR72 | -9.8382223 | 0.0000034 |
| UniRef90_A0A2K5Q2B0 | -9.8683093 | 0.0099186 |
| UniRef50_Q96LU0: CDNA FLJ25069 fis, clone CBL05145 | -9.8742796 | 0.0000000 |
| UniRef90_A0A1N5F4A1 | -9.9499290 | 0.0000001 |
| UniRef90_F2QFF2 | -9.9505363 | 0.0000002 |
| UniRef90_F9Q035 | -9.9659023 | 0.0000103 |
| UniRef90_F0IU43 | -9.9688900 | 0.0000010 |
| UniRef90_W1XIL2 | -9.9729787 | 0.0058006 |
| UniRef90_F6QR35 | -9.9777634 | 0.0000000 |
| UniRef90_U7V619 | -9.9961323 | 0.0000000 |
| UniRef90_F9LUN9 | -10.0050446 | 0.0000001 |
| UniRef90_F3SL59 | -10.0249965 | 0.0000021 |
| UniRef90_M0QXE4 | -10.0253680 | 0.0000000 |
| UniRef90_UPI00045E37E4 | -10.0579599 | 0.0000006 |
| UniRef90_A8AZT8 | -10.1081340 | 0.0000011 |
| UniRef50_W1XIL2 | -10.1105605 | 0.0049409 |
| UniRef90_A0A059RKT2 | -10.1461038 | 0.0098084 |
| UniRef50_A0A0D9S931 | -10.1515885 | 0.0000001 |
| UniRef90_E0PNM2 | -10.2043464 | 0.0000000 |
| UniRef90_B1IBW4 | -10.2046745 | 0.0000044 |
| UniRef90_Q9N083 | -10.2786860 | 0.0070558 |
| UniRef90_W1VR94 | -10.2825608 | 0.0000010 |
| UniRef90_F0FH66 | -10.2895807 | 0.0000012 |
| UniRef90_A0A287B4J8 | -10.3103464 | 0.0000000 |
| UniRef90_A0A0D9S5U8 | -10.3110844 | 0.0000005 |
| UniRef90_Q8WZ39 | -10.3357997 | 0.0000000 |
| UniRef90_A0A287AUW5 | -10.3480753 | 0.0000000 |
| UniRef90_L8EAJ4 | -10.4081280 | 0.0000004 |
| UniRef90_F9Q2A0 | -10.4147292 | 0.0050472 |
| UniRef90_U7V0Y2 | -10.5197579 | 0.0095404 |
| UniRef90_UPI0005F4E7EF | -10.5496471 | 0.0000000 |
| UniRef90_A0JZ85 | -10.7101199 | 0.0000009 |
| UniRef90_G0PP10 | -10.7435601 | 0.0089581 |
| UniRef90_E8JSH6 | -10.9159656 | 0.0000005 |
| UniRef90_I1ZM31 | -10.9648887 | 0.0061977 |
| UniRef90_A0A0F3HB42 | -10.9839635 | 0.0000139 |
| UniRef90_F0IIW6 | -11.1665854 | 0.0000000 |
| UniRef90_Q9H7M3 | -11.3754229 | 0.0000000 |
| UniRef50_UPI00045E37E4: PREDICTED: protein GVQW1-like, partial | -11.4294979 | 0.0000000 |
| UniRef90_A0A0P6A737 | -11.4848805 | 0.0000724 |
| UniRef90_B2YI96 | -11.6707221 | 0.0032838 |
| UniRef90_A3CNB2 | -11.7980256 | 0.0097986 |
| UniRef90_W1XIF5 | -12.1070340 | 0.0075653 |
| UniRef50_W1XIF5 | -12.1070340 | 0.0075653 |
| UniRef90_A0A1X1G532 | -12.3770215 | 0.0056662 |
| UniRef90_I7G8A4 | -12.4250648 | 0.0074428 |
| UniRef90_P67530 | -12.8829662 | 0.0084962 |
| UniRef90_A0A139QR72 | -13.1785192 | 0.0049855 |
| UniRef90_A5MC70 | -14.3987371 | 0.0000012 |
Negative fold changes are higher in blood samples while positive are higher in saliva_euk samples.
blbe3 = genes.loc[:, ['Blood_Saliva_euk_FC', 'Blood_Saliva_euk_p']]
blbe3 = blbe3[blbe3['Blood_Saliva_euk_p'] < 0.01]
blbe3 = blbe3.sort_values(by=['Blood_Saliva_euk_FC'], ascending=False).rename(columns={'Blood_Saliva_euk_FC':'Log2 fold change', 'Blood_Saliva_euk_p':'p'})
blbe3 = pd.concat([blbe3.head(n=50), blbe3.tail(n=50)])
blbe3 = blbe3.groupby(by=blbe3.index, axis=0).mean()
blbe3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$blbe3 %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| UniRef90_Q5LXM5 | 15.340131 | 0.0000004 |
| UniRef90_H2P484 | 14.073019 | 0.0000000 |
| UniRef90_A0A2I1TTC0 | 13.898148 | 0.0000002 |
| UniRef50_A0A146PTM6 | 13.108306 | 0.0000006 |
| UniRef50_Q9H7M3: FLJ00047 protein (Fragment) | 13.068202 | 0.0000000 |
| UniRef90_A0A2K6THT0 | 13.056995 | 0.0000000 |
| UniRef90_Q96NR6 | 12.873614 | 0.0000000 |
| UniRef90_H2P861 | 12.697271 | 0.0094192 |
| UniRef90_D3IDX3 | 12.583876 | 0.0000000 |
| UniRef90_J7TUV6 | 12.523060 | 0.0000054 |
| UniRef90_E4LCZ2 | 12.367192 | 0.0000048 |
| UniRef50_Q9NRJ1: Protein MOST-1 | 12.358124 | 0.0000000 |
| UniRef90_H2PFT0 | 12.204896 | 0.0000000 |
| UniRef50_A0A174IYG9 | 12.163148 | 0.0000000 |
| UniRef90_A0A174IYG9 | 12.072377 | 0.0000000 |
| UniRef90_A7AJP8 | 12.042644 | 0.0000116 |
| UniRef90_I7GKC4 | 12.036674 | 0.0000000 |
| UniRef90_A5ZLI5 | 12.032373 | 0.0000000 |
| UniRef90_D9RRC6 | 12.015198 | 0.0001089 |
| UniRef90_A0A2J8UEK6 | 11.960983 | 0.0000000 |
| UniRef90_D6KM31 | 11.925676 | 0.0000002 |
| UniRef90_W1V953 | 11.870992 | 0.0000001 |
| UniRef90_G5SMU9 | 11.813347 | 0.0000027 |
| UniRef50_V8CH43 | 11.796140 | 0.0000000 |
| UniRef90_A0A134C920 | 11.745783 | 0.0000131 |
| UniRef90_D5EYY8 | 11.715933 | 0.0000000 |
| UniRef90_A0A096C1X8 | 11.679717 | 0.0000002 |
| UniRef90_A0A2D3LM49 | 11.661013 | 0.0000001 |
| UniRef90_C9MQ90 | 11.624374 | 0.0000020 |
| UniRef50_F8WAV8: Interleukin-20 receptor subunit beta | 11.542661 | 0.0096313 |
| UniRef90_A0A1B1IC24 | 11.528934 | 0.0000000 |
| UniRef90_H2NVN9 | 11.515563 | 0.0000000 |
| UniRef90_A7UXY6 | 11.501784 | 0.0000510 |
| UniRef50_W1W9S2 | 11.494030 | 0.0000000 |
| UniRef50_B4DHF1: cDNA FLJ61749 | 11.482711 | 0.0000000 |
| UniRef90_X1H491 | 11.475303 | 0.0000000 |
| UniRef90_Q8A499 | 11.471490 | 0.0000002 |
| UniRef50_A0A1C5TMP4 | 11.457822 | 0.0000003 |
| UniRef50_A5ZLI4 | 11.394738 | 0.0000150 |
| UniRef50_W1WLZ3 | 11.389933 | 0.0000005 |
| UniRef90_W1WLZ3 | 11.379954 | 0.0000005 |
| UniRef90_A7B8Q2 | 11.307113 | 0.0000353 |
| UniRef90_U2Z4S8 | 11.302840 | 0.0000896 |
| UniRef90_D3IN37 | 11.291941 | 0.0000001 |
| UniRef90_E4LBK5 | 11.270987 | 0.0000130 |
| UniRef50_A0A2K0XJM9 | 11.260841 | 0.0000022 |
| UniRef90_J7TNS4 | 11.257366 | 0.0000327 |
| UniRef90_U2J131 | 11.235496 | 0.0000000 |
| UniRef90_Q9NRJ1 | 11.232382 | 0.0000000 |
| UniRef90_G1VAR6 | 11.231031 | 0.0000015 |
| UniRef50_UPI000B3F59CB | -1.190865 | 0.0056979 |
| UniRef50_W5P5C7 | -1.205205 | 0.0067865 |
| UniRef90_UPI000732AD10 | -1.212379 | 0.0075341 |
| UniRef90_UPI000847C819 | -1.231627 | 0.0075318 |
| UniRef90_Q6ZRZ7 | -1.236436 | 0.0089660 |
| UniRef50_A0A1A9ASM8 | -1.241270 | 0.0014614 |
| UniRef90_A0A1A9ASM8 | -1.241270 | 0.0014614 |
| UniRef90_P30050 | -1.265497 | 0.0052068 |
| UniRef90_A0A2U2W3H2 | -1.283754 | 0.0023887 |
| UniRef90_H2N8Q4 | -1.331225 | 0.0064317 |
| UniRef50_G1QSA7 | -1.364410 | 0.0057139 |
| UniRef50_P08865: 40S ribosomal protein SA | -1.373165 | 0.0005701 |
| UniRef50_G3SIN0 | -1.401976 | 0.0048580 |
| UniRef50_UPI000EF294EE | -1.416099 | 0.0024257 |
| UniRef90_UPI000EF294EE | -1.420442 | 0.0018108 |
| UniRef50_A0A1A9ANW3 | -1.447717 | 0.0006089 |
| UniRef90_F7HQ61 | -1.489799 | 0.0076729 |
| UniRef90_A0A2K5HHY8 | -1.493760 | 0.0015008 |
| UniRef50_F8VYN5: Heterogeneous nuclear ribonucleoprotein A1 (Fragment) | -1.517292 | 0.0016908 |
| UniRef50_UPI0008F51C75 | -1.528115 | 0.0062973 |
| UniRef90_A0A1A9ANW3 | -1.528735 | 0.0002737 |
| UniRef90_UPI000D0A1051 | -1.533556 | 0.0069270 |
| UniRef90_Q9Z154 | -1.557043 | 0.0011519 |
| UniRef90_G7P1P7 | -1.567828 | 0.0035882 |
| UniRef50_A0A2U2VZN6 | -1.582218 | 0.0030915 |
| UniRef90_G7MP19 | -1.585940 | 0.0006413 |
| UniRef90_X1UIZ8 | -1.638382 | 0.0007095 |
| UniRef90_D6RAT0 | -1.684800 | 0.0063811 |
| UniRef90_A0A2U2VZN6 | -1.704860 | 0.0029642 |
| UniRef90_UPI000C313812 | -1.740306 | 0.0041819 |
| UniRef50_P03923: NADH-ubiquinone oxidoreductase chain 6 | -1.763740 | 0.0071359 |
| UniRef90_UPI0009A8E1A0 | -1.822238 | 0.0065870 |
| UniRef90_P61247 | -1.861389 | 0.0024973 |
| UniRef50_P03905: NADH-ubiquinone oxidoreductase chain 4 | -1.891614 | 0.0069146 |
| UniRef90_P84040 | -2.017721 | 0.0076135 |
| UniRef90_J9NRV0 | -2.017721 | 0.0076135 |
| UniRef90_A0A2M4CW35 | -2.018112 | 0.0076281 |
| UniRef90_R4G555 | -2.114032 | 0.0076415 |
| UniRef90_F7A9Z7 | -2.117455 | 0.0023078 |
| UniRef90_P03923 | -2.134030 | 0.0074298 |
| UniRef90_S9X3V4 | -2.265461 | 0.0057090 |
| UniRef50_H2PTC1 | -2.328117 | 0.0060452 |
| UniRef90_X1FPE0 | -2.522721 | 0.0098859 |
| UniRef90_A0A343KLL1 | -2.580423 | 0.0039726 |
| UniRef90_H2PTC1 | -2.605292 | 0.0060061 |
| UniRef90_B4YG13 | -2.692092 | 0.0043643 |
| UniRef90_A0A343FX78 | -2.700931 | 0.0030189 |
| UniRef90_Q99878 | -3.176581 | 0.0044857 |
| UniRef90_F6TZJ6 | -3.647140 | 0.0060113 |
| UniRef90_A0A059UGD1 | -6.564523 | 0.0053080 |
Negative fold changes are higher in blood samples while positive are higher in buccal_euk samples.
blse3 = genes.loc[:, ['Blood_Buccal_euk_FC', 'Blood_Buccal_euk_p']]
blse3 = blse3[blse3['Blood_Buccal_euk_p'] < 0.01]
blse3 = blse3.sort_values(by=['Blood_Buccal_euk_FC'], ascending=False).rename(columns={'Blood_Buccal_euk_FC':'Log2 fold change', 'Blood_Buccal_euk_p':'p'})
blse3 = pd.concat([blse3.head(n=50), blse.tail(n=50)])
blse3 = blse3.groupby(by=blse3.index, axis=0).mean()
blse3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$blse3 %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| UniRef90_H2NL54 | 12.134172 | 0.0000001 |
| UniRef90_I7GKC4 | 11.766724 | 0.0000025 |
| UniRef90_A0A2B5ASZ2 | 11.486150 | 0.0000014 |
| UniRef90_B3KRW9 | 11.399952 | 0.0000111 |
| UniRef50_D3DPD0: HCG1652138, isoform CRA_a | 11.389117 | 0.0000000 |
| UniRef90_G7P4E9 | 11.289988 | 0.0006329 |
| UniRef90_UPI0004F0B482 | 11.183212 | 0.0000001 |
| UniRef50_UPI000CEFC57C | 11.085708 | 0.0000004 |
| UniRef90_UPI000CEFC57C | 11.069592 | 0.0000013 |
| UniRef90_G7NJR0 | 10.880698 | 0.0088362 |
| UniRef90_Q04ML4 | 10.766447 | 0.0003120 |
| UniRef90_A0A2K6KDZ4 | 10.696823 | 0.0069987 |
| UniRef90_Q8DRE1 | 10.693464 | 0.0000025 |
| UniRef90_A0A2I3MMY7 | 10.331505 | 0.0000001 |
| UniRef90_H2NUU9 | 10.040391 | 0.0000016 |
| UniRef50_Q6ZRI4: cDNA FLJ46339 fis, clone TESTI4046450 | 9.991525 | 0.0000027 |
| UniRef90_Q6ZRI4 | 9.991525 | 0.0000027 |
| UniRef90_F8WAV8 | 9.987112 | 0.0000004 |
| UniRef90_H3BUS0 | 9.970033 | 0.0000073 |
| UniRef50_Q9UI52: HCG1818783 | 9.892900 | 0.0000113 |
| UniRef90_G7N124 | 9.818158 | 0.0000000 |
| UniRef50_UPI000B7B7569 | 9.788402 | 0.0000023 |
| UniRef90_A0A1D5R7R1 | 9.722046 | 0.0000000 |
| UniRef50_W1W9S2 | 9.691685 | 0.0000205 |
| UniRef90_A0A0F0CDV5 | 9.626875 | 0.0002394 |
| UniRef50_A0A0F0CDV5 | 9.626875 | 0.0002504 |
| UniRef50_H2PP79 | 9.549300 | 0.0000001 |
| UniRef90_H2PP79 | 9.549300 | 0.0000001 |
| UniRef50_Q6ZSN1 | 9.538742 | 0.0000425 |
| UniRef50_M0R171: Zinc finger protein 468 | 9.437993 | 0.0000017 |
| UniRef90_M0R171 | 9.437993 | 0.0000017 |
| UniRef90_J7SHX6 | 9.308914 | 0.0001051 |
| UniRef90_H2NBN2 | 9.264875 | 0.0000002 |
| UniRef50_Q02878: 60S ribosomal protein L6 | 9.131735 | 0.0000003 |
| UniRef90_UPI0005F40944 | 9.100331 | 0.0000002 |
| UniRef50_UPI0005F40944 | 9.100331 | 0.0000002 |
| UniRef90_A0A2I3LMT4 | 9.093173 | 0.0000927 |
| UniRef90_A0A2I3RVA0 | 9.055319 | 0.0000000 |
| UniRef90_A0A1A9AMQ3 | 9.050824 | 0.0000030 |
| UniRef50_A0A1A9AMQ3 | 9.050824 | 0.0000030 |
| UniRef50_P05141: ADP/ATP translocase 2 | 9.033386 | 0.0000077 |
| UniRef90_Q6ZSN1 | 8.997338 | 0.0000172 |
| UniRef90_Q6ZVI0 | 8.957761 | 0.0000002 |
| UniRef50_Q6ZVI0: cDNA FLJ42559 fis, clone BRACE3006226 | 8.957761 | 0.0000002 |
| UniRef50_B4E3R2: cDNA FLJ59085, highly similar to Polycystin-1 | 8.865063 | 0.0000008 |
| UniRef90_A0A2I2ZDD7 | 8.860090 | 0.0000007 |
| UniRef50_L8E7P6: Alternative protein FLG | 8.851721 | 0.0000049 |
| UniRef90_A0A062X488 | 8.844600 | 0.0000186 |
| UniRef50_A0A062X488 | 8.844600 | 0.0000280 |
| UniRef90_S4R460 | 8.773151 | 0.0000000 |
| UniRef50_I7G8L2: Macaca fascicularis brain cDNA clone: QmoA-11833, similar to human chemokine (C-C motif) ligand 5 (CCL5), mRNA, RefSeq: NM_002985.2 | -3.057083 | 0.0044197 |
| UniRef50_UPI000273D3DB: PREDICTED: histone demethylase UTY-like | -3.058105 | 0.0006568 |
| UniRef50_X1RTY3: Marine sediment metagenome DNA, contig: S12H4_L05131 | -3.088100 | 0.0001320 |
| UniRef50_Q6ZNX4: CDNA FLJ26942 fis, clone RCT07464 | -3.088863 | 0.0000248 |
| UniRef50_Q9BZ60: FKSG63 | -3.094164 | 0.0043170 |
| UniRef50_H2PBI0 | -3.194845 | 0.0050537 |
| UniRef50_Q8NF05: FLJ00399 protein (Fragment) | -3.195422 | 0.0017753 |
| UniRef50_G3RYZ2 | -3.202026 | 0.0017820 |
| UniRef50_Q1W209: Embryonic stem cell-related gene protein | -3.298909 | 0.0040440 |
| UniRef50_UPI00045E37E4: PREDICTED: protein GVQW1-like, partial | -3.521571 | 0.0021273 |
| UniRef50_F6TJB2 | -3.599640 | 0.0073896 |
| UniRef50_W1YJZ2 | -3.669211 | 0.0000867 |
| UniRef50_F7GRN4 | -3.714752 | 0.0009200 |
| UniRef50_Q6ZUG4: cDNA FLJ43741 fis, clone TESTI2017727 | -3.918750 | 0.0026472 |
| UniRef50_Q6FG63: FLJ10385 protein | -4.086130 | 0.0029433 |
| UniRef50_W1WD51 | -4.129838 | 0.0000402 |
| UniRef50_O95662: KpnI repetitive sequence (T-betaG41) 3kb downstream of beta-globin protein (Fragment) | -4.204094 | 0.0015056 |
| UniRef50_Q8WY51: HC6 | -4.225498 | 0.0064566 |
| UniRef50_S8BUY4 | -4.259124 | 0.0000409 |
| UniRef50_F6S8D7 | -4.301492 | 0.0058294 |
| UniRef50_Q8N329: C3orf64 protein | -4.310691 | 0.0051846 |
| UniRef50_I7G4S6: Macaca fascicularis brain cDNA clone: QbsB-10410, similar to human hypothetical protein FLJ20457 (FLJ20457), mRNA, RefSeq: NM_017832.2 | -4.419854 | 0.0067237 |
| UniRef50_F7GW59 | -4.437119 | 0.0000852 |
| UniRef50_Q8N287: cDNA FLJ33669 fis, clone BRAMY2028740 | -4.463438 | 0.0061385 |
| UniRef50_W1V5A8 | -4.529869 | 0.0000501 |
| UniRef50_Q6ZR97: cDNA FLJ46534 fis, clone THYMU3037052, weakly similar to Homo sapiens HIV TAT specific factor 1 (HTATSF1) | -4.612080 | 0.0042394 |
| UniRef50_UPI000292A7A2 | -4.755766 | 0.0050788 |
| UniRef50_G2HH39: Zinc finger protein 580 | -4.874037 | 0.0066219 |
| UniRef50_F7HM41 | -5.239724 | 0.0041429 |
| UniRef50_G9L4Y9 | -5.290411 | 0.0077388 |
| UniRef50_F7ERF9 | -5.464497 | 0.0084036 |
| UniRef50_UPI0001AF4819: hypothetical protein | -5.485818 | 0.0013148 |
| UniRef50_I7G402: Macaca fascicularis brain cDNA clone: QbsA-12013, similar to human glutamate receptor, metabotropic 6 (GRM6), mRNA, RefSeq: NM_000843.2 | -5.510659 | 0.0026030 |
| UniRef50_G7PG11 | -5.567385 | 0.0067932 |
| UniRef50_Q96LU0: CDNA FLJ25069 fis, clone CBL05145 | -5.625021 | 0.0047341 |
| UniRef50_Q9UI61: PRO0470 | -5.839999 | 0.0088171 |
| UniRef50_Q9GMT0 | -5.905245 | 0.0092684 |
| UniRef50_Q6ZNZ6: CDNA FLJ26821 fis, clone PRS06629 | -5.952807 | 0.0070262 |
| UniRef50_M3Z9H5 | -6.138953 | 0.0025770 |
| UniRef50_F7CR38 | -6.162863 | 0.0077810 |
| UniRef50_X6R7Y7: Intraflagellar transport protein 25 homolog | -6.496334 | 0.0059577 |
| UniRef50_K7EJ15: Ankyrin repeat domain-containing protein 20A3 (Fragment) | -6.986405 | 0.0002879 |
| UniRef50_F6ZQ59 | -7.076242 | 0.0070378 |
| UniRef50_Q4XHH2 | -7.100608 | 0.0066861 |
| UniRef50_Q8N210: Retbindin | -7.152420 | 0.0042904 |
| UniRef50_H7BYS4 | -7.256297 | 0.0030867 |
| UniRef50_Q4Y9P0 | -7.638074 | 0.0045271 |
| UniRef50_Q8WTZ3: Zinc finger protein ENSP00000375192 | -8.305599 | 0.0091100 |
| UniRef50_S9WFI9 | -9.303851 | 0.0000001 |
| UniRef50_S4L0C2 | -9.774147 | 0.0042497 |
For each of these, I am initially filtering out to only P values that are below 0.01, and then am taking only the most up- or down-regulated KEGG orthologs and printing them to a table.
For HUMAnN2 I am now looking at the KEGG orthologs assigned to the Uniref50 database, as above for the gene families.
kos = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann/processing/genes_50_ko.csv', header=0, index_col=0)
kegg_list = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kegg_list.csv', header=0, index_col=0)
kegg_list.drop_duplicates(inplace=True)
kegg_dict = {}
kl = list(kegg_list.index.values)
for ko in kl:
kegg_dict[ko] = ko+' '+kegg_list.loc[ko, 'Product']
kos = kos.rename(index=kegg_dict)
Negative fold changes are higher in saliva samples while positive are higher in blood samples.
bs = kos.loc[:, ['Saliva_Blood_FC', 'Saliva_Blood_p']]
bs = bs[bs['Saliva_Blood_p'] < 0.01]
bs = bs.sort_values(by=['Saliva_Blood_FC'], ascending=False).rename(columns={'Saliva_Blood_FC':'Log2 fold change', 'Saliva_Blood_p':'p'})
bs = pd.concat([bs.head(n=50), bs.tail(n=50)])
bs.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bs %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| K04496 CTBP; C-terminal binding protein | 1.9334599 | 0.0002204 |
| K02866 RP-L10e, RPL10; large subunit ribosomal protein L10e | 1.7897436 | 0.0028306 |
| K02268 COX6C; cytochrome c oxidase subunit 6c | 1.7510718 | 0.0067512 |
| K02934 RP-L6e, RPL6; large subunit ribosomal protein L6e | 1.4908265 | 0.0024136 |
| K05863 SLC25A4S, ANT; solute carrier family 25 (mitochondrial adenine nucleotide translocator), member 4/5/6/31 | 1.3915885 | 0.0019331 |
| K05414 IFNA; interferon alpha | 1.1521229 | 0.0033957 |
| K12741 HNRNPA1_3; heterogeneous nuclear ribonucleoprotein A1/A3 | 1.0346679 | 0.0038171 |
| K19469 FTO; mRNA N6-methyladenine demethylase | 1.0320411 | 0.0044718 |
| K03884 ND6; NADH-ubiquinone oxidoreductase chain 6 | -0.7506043 | 0.0069142 |
| K02998 RP-SAe, RPSA; small subunit ribosomal protein SAe | -1.0165759 | 0.0064599 |
| K08961 K08961; chondroitin-sulfate-ABC endolyase/exolyase | -1.0460080 | 0.0061217 |
| K09228 KRAB; KRAB domain-containing zinc finger protein | -1.1464222 | 0.0062389 |
| K02870 RP-L12e, RPL12; large subunit ribosomal protein L12e | -1.1857902 | 0.0036332 |
| K12567 TTN; titin | -1.3149551 | 0.0000317 |
| K14217 IFIT1; interferon-induced protein with tetratricopeptide repeats 1 | -1.3358230 | 0.0021426 |
| K00412 CYTB, petB; ubiquinol-cytochrome c reductase cytochrome b subunit | -1.3438118 | 0.0008233 |
| K09671 CHST6; carbohydrate 6-sulfotransferase 6 | -1.6336278 | 0.0018200 |
| K16741 ALMS1; alstrom syndrome protein 1 | -1.7043058 | 0.0010936 |
| K04746 CHST4; carbohydrate 6-sulfotransferase 4 | -1.7395004 | 0.0003520 |
| K15613 MEIS1; homeobox protein Meis1 | -1.7553549 | 0.0099416 |
| K06122 dhbE; glycerol dehydratase small subunit | -1.8117034 | 0.0010157 |
| K11480 AURKC; aurora kinase C | -1.9882583 | 0.0009205 |
| K03096 FRAT2; frequently rearranged in advanced T-cell lymphomas 2 | -2.0489115 | 0.0012559 |
| K08026 ONECUT1, HNF6; one cut domain, family member 1, hepatocyte nuclear factor 6 | -2.0718133 | 0.0019616 |
| K10054 PML, TRIM19; probable transcription factor PML | -2.1362750 | 0.0016119 |
| K16496 PCDHGB; protocadherin gamma subfamily B | -2.1441586 | 0.0000246 |
| K16670 MEIS2; homeobox protein Meis2 | -2.1545379 | 0.0002416 |
| K04011 CR1, CD35; complement component (3b/4b) receptor 1 | -2.1798510 | 0.0047209 |
| K09370 ISL1; insulin gene enhancer protein ISL-1 | -2.2417383 | 0.0040429 |
| K03006 RPB1, POLR2A; DNA-directed RNA polymerase II subunit RPB1 | -2.2887214 | 0.0023002 |
| K12864 CTNNBL1; beta-catenin-like protein 1 | -2.2988227 | 0.0098203 |
| K03065 PSMC3, RPT5; 26S proteasome regulatory subunit T5 | -2.2994769 | 0.0014369 |
| K08610 ADAM21; disintegrin and metalloproteinase domain-containing protein 21 | -2.3529345 | 0.0000961 |
| K02329 FZD3; frizzled 3 | -2.3883758 | 0.0034076 |
| K17778 TIM10; mitochondrial import inner membrane translocase subunit TIM10 | -2.4172144 | 0.0085489 |
| K07606 VIM; vimentin | -2.4243389 | 0.0047630 |
| K10455 KLHL18; kelch-like protein 18 | -2.4673402 | 0.0071273 |
| K09387 FOXA1, HNF3A; forkhead box protein A1, hepatocyte nuclear factor 3-alpha | -2.4945583 | 0.0023481 |
| K16586 HAUS3; HAUS augmin-like complex subunit 3 | -2.4967870 | 0.0010657 |
| K04914 KCNK3, K2P3.1; potassium channel subfamily K member 3 | -2.5433417 | 0.0012574 |
| K12477 EHD4; EH domain-containing protein 4 | -2.5433417 | 0.0008572 |
| K12653 NLRX1; NLR family member X1 | -2.5547055 | 0.0065685 |
| K08103 HS6ST3; heparan sulfate 6-O-sulfotransferase HS6ST3 | -2.5828174 | 0.0002684 |
| K11182 AOC1, ABP1; diamine oxidase | -2.5835284 | 0.0002678 |
| K18727 TNPO2, IPO3, KPNB2B; transportin-2 | -2.5994545 | 0.0000150 |
| K13981 AKR1E2, AKR1CL2; 1,5-anhydro-D-fructose reductase | -2.6269843 | 0.0001921 |
| K08527 RARA, NR1B1; retinoic acid receptor alpha | -2.6342905 | 0.0020800 |
| K04813 CHRNB2; nicotinic acetylcholine receptor beta-2 | -2.6342905 | 0.0000155 |
| K17709 CYP2B6; cytochrome P450 family 2 subfamily B polypeptide 6 | -2.6435049 | 0.0038909 |
| K08528 RARB, NR1B2; retinoic acid receptor beta | -2.6542668 | 0.0000447 |
| K02867 RP-L11, MRPL11, rplK; large subunit ribosomal protein L11 | -14.2369610 | 0.0000000 |
| K02968 RP-S20, rpsT; small subunit ribosomal protein S20 | -14.2403147 | 0.0000000 |
| K02871 RP-L13, MRPL13, rplM; large subunit ribosomal protein L13 | -14.2509766 | 0.0000000 |
| K02895 RP-L24, MRPL24, rplX; large subunit ribosomal protein L24 | -14.2587624 | 0.0000000 |
| K02015 ABC.FEV.P; iron complex transport system permease protein | -14.2605983 | 0.0000000 |
| K02888 RP-L21, MRPL21, rplU; large subunit ribosomal protein L21 | -14.2628930 | 0.0000000 |
| K02874 RP-L14, MRPL14, rplN; large subunit ribosomal protein L14 | -14.2631149 | 0.0000000 |
| K02890 RP-L22, MRPL22, rplV; large subunit ribosomal protein L22 | -14.2878814 | 0.0000000 |
| K02935 RP-L7, MRPL12, rplL; large subunit ribosomal protein L7/L12 | -14.2943955 | 0.0000000 |
| K02959 RP-S16, MRPS16, rpsP; small subunit ribosomal protein S16 | -14.2999624 | 0.0000000 |
| K02994 RP-S8, rpsH; small subunit ribosomal protein S8 | -14.3026847 | 0.0000000 |
| K03496 parA, soj; chromosome partitioning protein | -14.3028272 | 0.0000000 |
| K02110 ATPF0C, atpE; F-type H+-transporting ATPase subunit c | -14.3061261 | 0.0000000 |
| K02879 RP-L17, MRPL17, rplQ; large subunit ribosomal protein L17 | -14.3396335 | 0.0013182 |
| K02990 RP-S6, MRPS6, rpsF; small subunit ribosomal protein S6 | -14.3455530 | 0.0000000 |
| K02884 RP-L19, MRPL19, rplS; large subunit ribosomal protein L19 | -14.3564629 | 0.0000000 |
| K02948 RP-S11, MRPS11, rpsK; small subunit ribosomal protein S11 | -14.3618646 | 0.0000000 |
| K02876 RP-L15, MRPL15, rplO; large subunit ribosomal protein L15 | -14.3672130 | 0.0000000 |
| K02996 RP-S9, MRPS9, rpsI; small subunit ribosomal protein S9 | -14.3718180 | 0.0000000 |
| K06147 ABCB-BAC; ATP-binding cassette, subfamily B, bacterial | -14.3725317 | 0.0000000 |
| K07496 K07496; putative transposase | -14.3817416 | 0.0000090 |
| K02013 ABC.FEV.A; iron complex transport system ATP-binding protein | -14.3830435 | 0.0000000 |
| K02887 RP-L20, MRPL20, rplT; large subunit ribosomal protein L20 | -14.3892942 | 0.0000000 |
| K02952 RP-S13, rpsM; small subunit ribosomal protein S13 | -14.3917295 | 0.0000000 |
| K02881 RP-L18, MRPL18, rplR; large subunit ribosomal protein L18 | -14.4148047 | 0.0000000 |
| K02904 RP-L29, rpmC; large subunit ribosomal protein L29 | -14.4183841 | 0.0000000 |
| K02965 RP-S19, rpsS; small subunit ribosomal protein S19 | -14.4973970 | 0.0000000 |
| K02954 RP-S14, MRPS14, rpsN; small subunit ribosomal protein S14 | -14.5614245 | 0.0000000 |
| K02950 RP-S12, MRPS12, rpsL; small subunit ribosomal protein S12 | -14.5673952 | 0.0000000 |
| K02899 RP-L27, MRPL27, rpmA; large subunit ribosomal protein L27 | -14.5759170 | 0.0000000 |
| K02003 ABC.CD.A; putative ABC transport system ATP-binding protein | -14.5781923 | 0.0000000 |
| K02909 RP-L31, rpmE; large subunit ribosomal protein L31 | -14.6157375 | 0.0000000 |
| K02946 RP-S10, MRPS10, rpsJ; small subunit ribosomal protein S10 | -14.6386369 | 0.0000000 |
| K07024 SPP; sucrose-6-phosphatase | -14.6485644 | 0.0000000 |
| K02963 RP-S18, MRPS18, rpsR; small subunit ribosomal protein S18 | -14.6563031 | 0.0013393 |
| K02961 RP-S17, MRPS17, rpsQ; small subunit ribosomal protein S17 | -14.7600819 | 0.0004679 |
| K02956 RP-S15, MRPS15, rpsO; small subunit ribosomal protein S15 | -14.7802134 | 0.0000000 |
| K03530 hupB; DNA-binding protein HU-beta | -14.7807616 | 0.0000000 |
| K01990 ABC-2.A; ABC-2 type transport system ATP-binding protein | -14.8026705 | 0.0000000 |
| K01992 ABC-2.P; ABC-2 type transport system permease protein | -14.8843035 | 0.0000000 |
| K03073 secE; preprotein translocase subunit SecE | -14.8931547 | 0.0000000 |
| K02916 RP-L35, MRPL35, rpmI; large subunit ribosomal protein L35 | -14.9053602 | 0.0000000 |
| K02902 RP-L28, MRPL28, rpmB; large subunit ribosomal protein L28 | -14.9478983 | 0.0000000 |
| K02970 RP-S21, MRPS21, rpsU; small subunit ribosomal protein S21 | -14.9753640 | 0.0000000 |
| K02078 acpP; acyl carrier protein | -14.9758915 | 0.0000000 |
| K02518 infA; translation initiation factor IF-1 | -14.9954649 | 0.0000000 |
| K02907 RP-L30, MRPL30, rpmD; large subunit ribosomal protein L30 | -15.1068973 | 0.0000000 |
| K02911 RP-L32, MRPL32, rpmF; large subunit ribosomal protein L32 | -15.2205278 | 0.0000000 |
| K02914 RP-L34, MRPL34, rpmH; large subunit ribosomal protein L34 | -15.9546015 | 0.0000000 |
| K02913 RP-L33, MRPL33, rpmG; large subunit ribosomal protein L33 | -17.7654733 | 0.0000000 |
Negative fold changes are higher in buccal samples while positive are higher in blood samples.
bb = kos.loc[:, ['Buccal_Blood_FC', 'Buccal_Blood_p']]
bb = bb[bb['Buccal_Blood_p'] < 0.01]
bb= bb.sort_values(by=['Buccal_Blood_FC'], ascending=False).rename(columns={'Buccal_Blood_FC':'Log2 fold change', 'Buccal_Blood_p':'p'})
bb = pd.concat([bb.head(n=50), bb.tail(n=50)])
bb.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bb %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| K02866 RP-L10e, RPL10; large subunit ribosomal protein L10e | 1.499031 | 0.0055909 |
| K04496 CTBP; C-terminal binding protein | 1.270333 | 0.0013989 |
| K03878 ND1; NADH-ubiquinone oxidoreductase chain 1 | -1.205975 | 0.0015199 |
| K00522 FTH1; ferritin heavy chain | -1.213633 | 0.0083241 |
| K02934 RP-L6e, RPL6; large subunit ribosomal protein L6e | -1.308519 | 0.0078647 |
| K05863 SLC25A4S, ANT; solute carrier family 25 (mitochondrial adenine nucleotide translocator), member 4/5/6/31 | -1.367098 | 0.0044002 |
| K03881 ND4; NADH-ubiquinone oxidoreductase chain 4 | -1.474114 | 0.0013476 |
| K03883 ND5; NADH-ubiquinone oxidoreductase chain 5 | -1.599817 | 0.0003537 |
| K02261 COX2; cytochrome c oxidase subunit 2 | -1.625205 | 0.0000490 |
| K02262 COX3; cytochrome c oxidase subunit 3 | -1.631428 | 0.0000390 |
| K00412 CYTB, petB; ubiquinol-cytochrome c reductase cytochrome b subunit | -1.657063 | 0.0000135 |
| K02256 COX1; cytochrome c oxidase subunit 1 | -1.714630 | 0.0000465 |
| K03879 ND2; NADH-ubiquinone oxidoreductase chain 2 | -1.784757 | 0.0005730 |
| K03882 ND4L; NADH-ubiquinone oxidoreductase chain 4L | -1.831291 | 0.0003196 |
| K02126 ATPeF0A, MTATP6, ATP6; F-type H+-transporting ATPase subunit a | -2.060504 | 0.0026289 |
| K03884 ND6; NADH-ubiquinone oxidoreductase chain 6 | -2.431830 | 0.0000092 |
| K03767 PPIA; peptidyl-prolyl cis-trans isomerase A (cyclophilin A) | -2.792954 | 0.0059237 |
| K16496 PCDHGB; protocadherin gamma subfamily B | -2.898683 | 0.0000035 |
| K16495 PCDHGA; protocadherin gamma subfamily A | -3.011391 | 0.0002148 |
| K08610 ADAM21; disintegrin and metalloproteinase domain-containing protein 21 | -3.340978 | 0.0000233 |
| K16741 ALMS1; alstrom syndrome protein 1 | -3.356812 | 0.0001533 |
| K01940 argG, ASS1; argininosuccinate synthase | -3.527865 | 0.0010200 |
| K12406 PKLR; pyruvate kinase isozymes R/L | -3.572806 | 0.0000174 |
| K04290 S1PR3, EDG3; sphingosine 1-phosphate receptor 3 | -3.577329 | 0.0000749 |
| K07625 GJB6, CX30; gap junction beta-6 protein | -3.712632 | 0.0016658 |
| K18727 TNPO2, IPO3, KPNB2B; transportin-2 | -3.908811 | 0.0000002 |
| K00134 GAPDH, gapA; glyceraldehyde 3-phosphate dehydrogenase | -3.959823 | 0.0003065 |
| K01753 dsdA; D-serine dehydratase | -3.974443 | 0.0000856 |
| K12437 pks13; polyketide synthase 13 | -4.015661 | 0.0011796 |
| K16145 MUC16, CA125; mucin-16 | -4.032076 | 0.0000001 |
| K04876 KCNA3, KV1.3; potassium voltage-gated channel Shaker-related subfamily A member 3 | -4.125433 | 0.0000001 |
| K16090 fiu; catecholate siderophore receptor | -4.160175 | 0.0000637 |
| K16653 dprE1; decaprenylphospho-beta-D-ribofuranose 2-oxidase | -4.174180 | 0.0000044 |
| K01667 tnaA; tryptophanase | -4.193504 | 0.0000065 |
| K04874 KCNA1, KV1.1; potassium voltage-gated channel Shaker-related subfamily A member 1 | -4.261196 | 0.0000131 |
| K13183 DDX50; ATP-dependent RNA helicase DDX50 | -4.292635 | 0.0000056 |
| K17392 IGF2BP2; insulin-like growth factor 2 mRNA-binding protein 2 | -4.350986 | 0.0000000 |
| K04879 KCNA6, KV1.6; potassium voltage-gated channel Shaker-related subfamily A member 6 | -4.368162 | 0.0000187 |
| K09393 TFDP3; transcription factor Dp-3 | -4.391917 | 0.0000011 |
| K16351 LRRC4, NGL2; netrin-G2 ligand | -4.459247 | 0.0000003 |
| K13922 pduP; propionaldehyde dehydrogenase | -4.464456 | 0.0000095 |
| K05988 dexA; dextranase | -4.466025 | 0.0007942 |
| K05757 ARPC1A_B; actin related protein 43892 complex, subunit 1A/1B | -4.492685 | 0.0000001 |
| K17391 IGF2BP1; insulin-like growth factor 2 mRNA-binding protein 1 | -4.543289 | 0.0000008 |
| K01805 xylA; xylose isomerase | -4.543446 | 0.0003544 |
| K15043 KPNA2_7; importin subunit alpha-1/8 | -4.551707 | 0.0000001 |
| K17582 MKI67; antigen KI-67 | -4.560015 | 0.0000000 |
| K19339 nosR; NosR/NirI family transcriptional regulator, nitrous oxide reductase regulator | -4.570831 | 0.0001472 |
| K11993 ACOT6; acyl-CoA thioesterase 6 | -4.572809 | 0.0000002 |
| K04901 KCNG2, KV6.2; potassium voltage-gated channel subfamily G member 2 | -4.579436 | 0.0000690 |
| K02952 RP-S13, rpsM; small subunit ribosomal protein S13 | -12.823884 | 0.0000020 |
| K02968 RP-S20, rpsT; small subunit ribosomal protein S20 | -12.840400 | 0.0000031 |
| K02036 pstB; phosphate transport system ATP-binding protein | -12.867276 | 0.0001099 |
| K03217 yidC, spoIIIJ, OXA1, ccfA; YidC/Oxa1 family membrane protein insertase | -12.880814 | 0.0000060 |
| K02834 rbfA; ribosome-binding factor A | -12.888706 | 0.0000052 |
| K12295 comE; two-component system, LytTR family, response regulator ComE | -12.903241 | 0.0001530 |
| K02003 ABC.CD.A; putative ABC transport system ATP-binding protein | -12.923195 | 0.0000004 |
| K02895 RP-L24, MRPL24, rplX; large subunit ribosomal protein L24 | -12.931376 | 0.0000003 |
| K16509 spxA; regulatory protein spx | -12.946255 | 0.0000188 |
| K02035 ABC.PE.S; peptide/nickel transport system substrate-binding protein | -12.951988 | 0.0000037 |
| K02994 RP-S8, rpsH; small subunit ribosomal protein S8 | -12.958845 | 0.0000050 |
| K02013 ABC.FEV.A; iron complex transport system ATP-binding protein | -12.970199 | 0.0000005 |
| K02963 RP-S18, MRPS18, rpsR; small subunit ribosomal protein S18 | -12.970403 | 0.0034532 |
| K02950 RP-S12, MRPS12, rpsL; small subunit ribosomal protein S12 | -12.978923 | 0.0000004 |
| K02440 GLPF; glycerol uptake facilitator protein | -12.985260 | 0.0000018 |
| K02956 RP-S15, MRPS15, rpsO; small subunit ribosomal protein S15 | -13.012473 | 0.0000001 |
| K03073 secE; preprotein translocase subunit SecE | -13.012813 | 0.0001316 |
| K02892 RP-L23, MRPL23, rplW; large subunit ribosomal protein L23 | -13.041780 | 0.0000012 |
| K02961 RP-S17, MRPS17, rpsQ; small subunit ribosomal protein S17 | -13.062567 | 0.0013966 |
| K03402 argR, ahrC; transcriptional regulator of arginine metabolism | -13.063615 | 0.0000080 |
| K02965 RP-S19, rpsS; small subunit ribosomal protein S19 | -13.092549 | 0.0000024 |
| K08998 K08998; uncharacterized protein | -13.100846 | 0.0000116 |
| K02946 RP-S10, MRPS10, rpsJ; small subunit ribosomal protein S10 | -13.106369 | 0.0000006 |
| K02529 lacI, galR; LacI family transcriptional regulator | -13.145471 | 0.0000210 |
| K03499 trkA, ktrA; trk system potassium uptake protein | -13.160580 | 0.0000018 |
| K02110 ATPF0C, atpE; F-type H+-transporting ATPase subunit c | -13.177140 | 0.0000005 |
| K02904 RP-L29, rpmC; large subunit ribosomal protein L29 | -13.188205 | 0.0000000 |
| K02899 RP-L27, MRPL27, rpmA; large subunit ribosomal protein L27 | -13.190505 | 0.0000043 |
| K02794 PTS-Man-EIIB, manX; PTS system, mannose-specific IIB component | -13.206069 | 0.0000440 |
| K02793 PTS-Man-EIIA, manX; PTS system, mannose-specific IIA component | -13.215231 | 0.0000194 |
| K02004 ABC.CD.P; putative ABC transport system permease protein | -13.275588 | 0.0000027 |
| K02015 ABC.FEV.P; iron complex transport system permease protein | -13.354928 | 0.0000008 |
| K02078 acpP; acyl carrier protein | -13.454275 | 0.0000016 |
| K03111 ssb; single-strand DNA-binding protein | -13.454335 | 0.0000044 |
| K02970 RP-S21, MRPS21, rpsU; small subunit ribosomal protein S21 | -13.486267 | 0.0000005 |
| K07052 K07052; uncharacterized protein | -13.508456 | 0.0000243 |
| K03574 mutT, NUDT15, MTH2; 8-oxo-dGTP diphosphatase | -13.527932 | 0.0000043 |
| K02954 RP-S14, MRPS14, rpsN; small subunit ribosomal protein S14 | -13.558267 | 0.0000014 |
| K02518 infA; translation initiation factor IF-1 | -13.721290 | 0.0000007 |
| K01756 purB, ADSL; adenylosuccinate lyase | -13.777987 | 0.0000075 |
| K06147 ABCB-BAC; ATP-binding cassette, subfamily B, bacterial | -13.779813 | 0.0000017 |
| K07024 SPP; sucrose-6-phosphatase | -13.822793 | 0.0000022 |
| K02902 RP-L28, MRPL28, rpmB; large subunit ribosomal protein L28 | -13.925475 | 0.0000017 |
| K02030 ABC.PA.S; polar amino acid transport system substrate-binding protein | -14.009529 | 0.0000147 |
| K07729 K07729; putative transcriptional regulator | -14.027926 | 0.0000088 |
| K02907 RP-L30, MRPL30, rpmD; large subunit ribosomal protein L30 | -14.061021 | 0.0000020 |
| K02911 RP-L32, MRPL32, rpmF; large subunit ribosomal protein L32 | -14.062830 | 0.0000173 |
| K01992 ABC-2.P; ABC-2 type transport system permease protein | -14.306074 | 0.0000008 |
| K01990 ABC-2.A; ABC-2 type transport system ATP-binding protein | -14.930515 | 0.0000006 |
| K02913 RP-L33, MRPL33, rpmG; large subunit ribosomal protein L33 | -17.574972 | 0.0000496 |
Negative fold changes are higher in saliva samples while positive are higher in buccal samples.
sb = kos.loc[:, ['Saliva_Buccal_FC', 'Saliva_Buccal_p']]
sb = sb[sb['Saliva_Buccal_p'] < 0.01]
sb= sb.sort_values(by=['Saliva_Buccal_FC'], ascending=False).rename(columns={'Saliva_Buccal_FC':'Log2 fold change', 'Saliva_Buccal_p':'p'})
sb = pd.concat([sb.head(n=50), sb.tail(n=50)])
sb.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$sb %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| K00689 E2.4.1.5; dextransucrase | 3.091796 | 0.0019816 |
| K03878 ND1; NADH-ubiquinone oxidoreductase chain 1 | 1.964259 | 0.0000558 |
| K03883 ND5; NADH-ubiquinone oxidoreductase chain 5 | 1.884454 | 0.0000277 |
| K02126 ATPeF0A, MTATP6, ATP6; F-type H+-transporting ATPase subunit a | 1.875469 | 0.0000097 |
| K03881 ND4; NADH-ubiquinone oxidoreductase chain 4 | 1.849453 | 0.0000343 |
| K03879 ND2; NADH-ubiquinone oxidoreductase chain 2 | 1.759213 | 0.0000154 |
| K03098 APOD; apolipoprotein D and lipocalin family protein | 1.757776 | 0.0060616 |
| K02256 COX1; cytochrome c oxidase subunit 1 | 1.705164 | 0.0000261 |
| K17769 TOM22; mitochondrial import receptor subunit TOM22 | 1.703909 | 0.0082306 |
| K03884 ND6; NADH-ubiquinone oxidoreductase chain 6 | 1.681226 | 0.0000629 |
| K02262 COX3; cytochrome c oxidase subunit 3 | 1.593552 | 0.0000505 |
| K10583 UBE2S, E2EPF; ubiquitin-conjugating enzyme E2 S | 1.572378 | 0.0040913 |
| K02261 COX2; cytochrome c oxidase subunit 2 | 1.554875 | 0.0000517 |
| K03880 ND3; NADH-ubiquinone oxidoreductase chain 3 | 1.550412 | 0.0005111 |
| K02125 ATPeF08, MTATP8, ATP8; F-type H+-transporting ATPase subunit 8 | 1.548533 | 0.0001840 |
| K05757 ARPC1A_B; actin related protein 43892 complex, subunit 1A/1B | 1.379567 | 0.0069916 |
| K02941 RP-LP0, RPLP0; large subunit ribosomal protein LP0 | 1.328357 | 0.0083823 |
| K18727 TNPO2, IPO3, KPNB2B; transportin-2 | 1.309357 | 0.0021068 |
| K17565 PPP1R26; protein phosphatase 1 regulatory subunit 26 | 1.261023 | 0.0090406 |
| K02901 RP-L27e, RPL27; large subunit ribosomal protein L27e | 1.234855 | 0.0029414 |
| K18752 TNPO1, IPO2, KPNB2; transportin-1 | 1.110764 | 0.0085421 |
| K07766 E3.6.1.52; diphosphoinositol-polyphosphate diphosphatase | 1.096184 | 0.0037824 |
| K11247 SH3GL; endophilin-A | 1.045827 | 0.0009235 |
| K14455 GOT2; aspartate aminotransferase, mitochondrial | 1.026448 | 0.0025992 |
| K11993 ACOT6; acyl-CoA thioesterase 6 | -1.022016 | 0.0072266 |
| K16362 FLRT; leucine-rich repeat transmembrane protein FLRT | -1.032869 | 0.0036952 |
| K08961 K08961; chondroitin-sulfate-ABC endolyase/exolyase | -1.046008 | 0.0061217 |
| K02898 RP-L26e, RPL26; large subunit ribosomal protein L26e | -1.084513 | 0.0088971 |
| K04985 PKD1; polycystin 1 | -1.094280 | 0.0040461 |
| K02980 RP-S29e, RPS29; small subunit ribosomal protein S29e | -1.111203 | 0.0058887 |
| K15040 VDAC2; voltage-dependent anion channel protein 2 | -1.122858 | 0.0056760 |
| K03249 EIF3F; translation initiation factor 3 subunit F | -1.130201 | 0.0019101 |
| K02432 FZD1_7, fz; frizzled 44013 | -1.178607 | 0.0075719 |
| K07980 KIR3DL, CD158; killer cell immunoglobulin-like receptor 3DL | -1.211362 | 0.0058397 |
| K02872 RP-L13Ae, RPL13A; large subunit ribosomal protein L13Ae | -1.355498 | 0.0028612 |
| K02951 RP-S12e, RPS12; small subunit ribosomal protein S12e | -1.480073 | 0.0097971 |
| K02947 RP-S10e, RPS10; small subunit ribosomal protein S10e | -1.512282 | 0.0066140 |
| K02768 PTS-Fru-EIIA, fruB; PTS system, fructose-specific IIA component | -1.579825 | 0.0051358 |
| K05995 pepE; dipeptidase E | -1.601694 | 0.0030411 |
| K00163 aceE; pyruvate dehydrogenase E1 component | -1.671457 | 0.0040851 |
| K00368 nirK; nitrite reductase (NO-forming) | -1.695229 | 0.0048353 |
| K02770 PTS-Fru-EIIC, fruA; PTS system, fructose-specific IIC component | -1.727637 | 0.0041018 |
| K05589 ftsB; cell division protein FtsB | -1.735533 | 0.0059633 |
| K07670 mtrA; two-component system, OmpR family, response regulator MtrA | -1.762458 | 0.0023952 |
| K02769 PTS-Fru-EIIB, fruA; PTS system, fructose-specific IIB component | -1.772604 | 0.0034711 |
| K00611 OTC, argF, argI; ornithine carbamoyltransferase | -1.775059 | 0.0038423 |
| K03801 lipB; lipoyl(octanoyl) transferase | -1.783586 | 0.0045839 |
| K00058 serA, PHGDH; D-3-phosphoglycerate dehydrogenase / 2-oxoglutarate reductase | -1.787206 | 0.0037298 |
| K01207 nagZ; beta-N-acetylhexosaminidase | -1.801148 | 0.0042518 |
| K06122 dhbE; glycerol dehydratase small subunit | -1.811703 | 0.0010157 |
| K02421 fliR; flagellar biosynthetic protein FliR | -8.657679 | 0.0000195 |
| K04065 osmY; hyperosmotically inducible periplasmic protein | -8.666040 | 0.0000078 |
| K07590 RP-L7A, rplGB; large subunit ribosomal protein L7A | -8.682366 | 0.0000007 |
| K06603 flaG; flagellar protein FlaG | -8.684221 | 0.0000005 |
| K00413 CYC1, CYT1, petC; ubiquinol-cytochrome c reductase cytochrome c1 subunit | -8.691016 | 0.0000026 |
| K03090 sigB; RNA polymerase sigma-B factor | -8.702081 | 0.0037978 |
| K00662 aacC; aminoglycoside 3-N-acetyltransferase | -8.745173 | 0.0000019 |
| K15635 apgM; 2,3-bisphosphoglycerate-independent phosphoglycerate mutase | -8.749110 | 0.0006290 |
| K02389 flgD; flagellar basal-body rod modification protein FlgD | -8.751046 | 0.0042398 |
| K08984 yjdF; putative membrane protein | -8.792184 | 0.0019274 |
| K05298 GAPA; glyceraldehyde-3-phosphate dehydrogenase (NADP+) (phosphorylating) | -8.805830 | 0.0000300 |
| K04070 pflX; putative pyruvate formate lyase activating enzyme | -8.815048 | 0.0080126 |
| K16293 psrB; polysulfide reductase chain B | -8.819294 | 0.0000001 |
| K00282 gcvPA; glycine dehydrogenase subunit 1 | -8.826747 | 0.0064730 |
| K02390 flgE; flagellar hook protein FlgE | -8.867032 | 0.0001173 |
| K04835 mal; methylaspartate ammonia-lyase | -8.888296 | 0.0000142 |
| K13034 ATCYSC1; L-3-cyanoalanine synthase/ cysteine synthase | -8.910907 | 0.0007810 |
| K13652 K13652; AraC family transcriptional regulator | -8.933437 | 0.0000003 |
| K01173 ENDOG; endonuclease G, mitochondrial | -8.966743 | 0.0039007 |
| K02416 fliM; flagellar motor switch protein FliM | -8.984770 | 0.0002853 |
| K07080 K07080; uncharacterized protein | -8.989049 | 0.0000105 |
| K06992 K06992; uncharacterized protein | -9.020148 | 0.0009517 |
| K02499 yabN; tetrapyrrole methylase family protein / MazG family protein | -9.031013 | 0.0076265 |
| K09979 K09979; uncharacterized protein | -9.092154 | 0.0000000 |
| K18285 mqnE; aminodeoxyfutalosine synthase | -9.118999 | 0.0007298 |
| K02415 fliL; flagellar FliL protein | -9.138959 | 0.0000291 |
| K18828 mvpA, vapC; tRNA(fMet)-specific endonuclease VapC | -9.140282 | 0.0000054 |
| K01305 iadA; beta-aspartyl-dipeptidase (metallo-type) | -9.179806 | 0.0058965 |
| K01678 E4.2.1.2AB, fumB; fumarate hydratase subunit beta | -9.207153 | 0.0020642 |
| K02405 fliA; RNA polymerase sigma factor for flagellar operon FliA | -9.216848 | 0.0020283 |
| K02387 flgB; flagellar basal-body rod protein FlgB | -9.239752 | 0.0009116 |
| K03747 smg; Smg protein | -9.261973 | 0.0025450 |
| K11782 mqnA; chorismate dehydratase | -9.271650 | 0.0021359 |
| K16209 lacS, galP, rafP; lactose/raffinose/galactose permease | -9.366334 | 0.0000001 |
| K00176 korD, oorD; 2-oxoglutarate ferredoxin oxidoreductase subunit delta | -9.372137 | 0.0002304 |
| K06221 dkgA; 2,5-diketo-D-gluconate reductase A | -9.381740 | 0.0066407 |
| K02408 fliE; flagellar hook-basal body complex protein FliE | -9.424117 | 0.0000042 |
| K03412 cheB; two-component system, chemotaxis family, protein-glutamate methylesterase/glutaminase | -9.443875 | 0.0013220 |
| K00068 srlD; sorbitol-6-phosphate 2-dehydrogenase | -9.445283 | 0.0083816 |
| K02420 fliQ; flagellar biosynthetic protein FliQ | -9.450876 | 0.0018003 |
| K02526 kdgT; 2-keto-3-deoxygluconate permease | -9.459134 | 0.0028565 |
| K08641 vanX; zinc D-Ala-D-Ala dipeptidase | -9.583023 | 0.0028897 |
| K15034 yaeJ; ribosome-associated protein | -9.597670 | 0.0005718 |
| K05303 K05303; O-methyltransferase | -9.630205 | 0.0000002 |
| K08972 K08972; putative membrane protein | -9.675798 | 0.0000003 |
| K09121 larC; pyridinium-3,5-bisthiocarboxylic acid mononucleotide nickel chelatase | -9.742747 | 0.0067663 |
| K03413 cheY; two-component system, chemotaxis family, chemotaxis protein CheY | -9.925642 | 0.0025733 |
| K08999 K08999; uncharacterized protein | -10.578154 | 0.0019780 |
| K19165 phd; antitoxin Phd | -10.866440 | 0.0000000 |
| K09474 phoN; acid phosphatase (class A) | -11.094953 | 0.0001973 |
Negative fold changes are higher in saliva samples while positive are higher in saliva_euk samples.
ss = kos.loc[:, ['Saliva_Saliva_euk_FC', 'Saliva_Saliva_euk_p']]
ss = ss[ss['Saliva_Saliva_euk_p'] < 0.01]
ss = ss.sort_values(by=['Saliva_Saliva_euk_FC'], ascending=False).rename(columns={'Saliva_Saliva_euk_FC':'Log2 fold change', 'Saliva_Saliva_euk_p':'p'})
ss = pd.concat([ss.head(n=50), ss.tail(n=50)])
ss.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$ss %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| K18177 COA4; cytochrome c oxidase assembly factor 4 | 4.712745 | 0.0076997 |
| K10435 MAP1LC; microtubule-associated protein 1 light chain | 1.637279 | 0.0094501 |
| K00889 PIP5K; 1-phosphatidylinositol-4-phosphate 5-kinase | 1.423017 | 0.0089829 |
| K02898 RP-L26e, RPL26; large subunit ribosomal protein L26e | 1.194339 | 0.0089101 |
| K02875 RP-L14e, RPL14; large subunit ribosomal protein L14e | 1.177381 | 0.0021542 |
| K12315 ACTG2; actin, gamma-enteric smooth muscle | 1.132975 | 0.0045303 |
| K11842 USP12_46; ubiquitin carboxyl-terminal hydrolase 17137 | 1.012306 | 0.0099154 |
| K00802 SMS; spermine synthase | -1.037657 | 0.0037952 |
| K08961 K08961; chondroitin-sulfate-ABC endolyase/exolyase | -1.046008 | 0.0061217 |
| K02877 RP-L15e, RPL15; large subunit ribosomal protein L15e | -1.100619 | 0.0041806 |
| K02872 RP-L13Ae, RPL13A; large subunit ribosomal protein L13Ae | -1.226700 | 0.0076598 |
| K03879 ND2; NADH-ubiquinone oxidoreductase chain 2 | -1.363466 | 0.0006745 |
| K18041 PTP4A; protein tyrosine phosphatase type IVA | -1.384276 | 0.0079469 |
| K02261 COX2; cytochrome c oxidase subunit 2 | -1.401805 | 0.0090004 |
| K00412 CYTB, petB; ubiquinol-cytochrome c reductase cytochrome b subunit | -1.411037 | 0.0099857 |
| K03881 ND4; NADH-ubiquinone oxidoreductase chain 4 | -1.451248 | 0.0012696 |
| K02262 COX3; cytochrome c oxidase subunit 3 | -1.486945 | 0.0005570 |
| K00595 cobL; precorrin-6Y C5,15-methyltransferase (decarboxylating) | -1.516774 | 0.0056848 |
| K02126 ATPeF0A, MTATP6, ATP6; F-type H+-transporting ATPase subunit a | -1.520625 | 0.0021185 |
| K02256 COX1; cytochrome c oxidase subunit 1 | -1.525916 | 0.0004332 |
| K02891 RP-L22e, RPL22; large subunit ribosomal protein L22e | -1.569902 | 0.0029241 |
| K03883 ND5; NADH-ubiquinone oxidoreductase chain 5 | -1.591861 | 0.0007432 |
| K08659 pepDA, pepDB; dipeptidase | -1.592450 | 0.0043681 |
| K06217 phoH, phoL; phosphate starvation-inducible protein PhoH and related proteins | -1.617955 | 0.0089621 |
| K01531 mgtA, mgtB; P-type Mg2+ transporter | -1.622132 | 0.0069774 |
| K12554 murN; alanine adding enzyme | -1.624233 | 0.0094830 |
| K03767 PPIA; peptidyl-prolyl cis-trans isomerase A (cyclophilin A) | -1.629183 | 0.0028230 |
| K05832 ABC.X4.P; putative ABC transport system permease protein | -1.639911 | 0.0044330 |
| K00887 dgkA; undecaprenol kinase | -1.644084 | 0.0016521 |
| K07089 K07089; uncharacterized protein | -1.653332 | 0.0027437 |
| K01607 pcaC; 4-carboxymuconolactone decarboxylase | -1.655238 | 0.0008895 |
| K15634 gpmB; probable phosphoglycerate mutase | -1.666755 | 0.0088832 |
| K00549 metE; 5-methyltetrahydropteroyltriglutamate–homocysteine methyltransferase | -1.693720 | 0.0059683 |
| K02121 ATPVE, ntpE, atpE; V/A-type H+/Na+-transporting ATPase subunit E | -1.705143 | 0.0083363 |
| K00376 nosZ; nitrous-oxide reductase | -1.714774 | 0.0084832 |
| K10716 kch, trkA, mthK, pch; voltage-gated potassium channel | -1.732670 | 0.0090649 |
| K10440 rbsC; ribose transport system permease protein | -1.736763 | 0.0057239 |
| K01207 nagZ; beta-N-acetylhexosaminidase | -1.737236 | 0.0091096 |
| K13019 wbpI, wlbD; UDP-GlcNAc3NAcA epimerase | -1.737515 | 0.0090235 |
| K07718 yesM; two-component system, sensor histidine kinase YesM | -1.738204 | 0.0029263 |
| K11051 ABC-2.CYL.P, cylB; multidrug/hemolysin transport system permease protein | -1.740461 | 0.0090891 |
| K01226 treC; trehalose-6-phosphate hydrolase | -1.745998 | 0.0096241 |
| K03930 estA; putative tributyrin esterase | -1.755111 | 0.0087738 |
| K01635 lacD; tagatose 1,6-diphosphate aldolase | -1.763753 | 0.0022842 |
| K10439 rbsB; ribose transport system substrate-binding protein | -1.769325 | 0.0046605 |
| K19302 bcrC; undecaprenyl-diphosphatase | -1.781207 | 0.0098837 |
| K00016 LDH, ldh; L-lactate dehydrogenase | -1.795384 | 0.0035589 |
| K02030 ABC.PA.S; polar amino acid transport system substrate-binding protein | -1.795685 | 0.0096538 |
| K02500 hisF; imidazole glycerol-phosphate synthase subunit HisF | -1.809006 | 0.0045947 |
| K06122 dhbE; glycerol dehydratase small subunit | -1.811703 | 0.0010157 |
| K16345 xanP; xanthine permease XanP | -5.273228 | 0.0000070 |
| K10843 ERCC3, XPB; DNA excision repair protein ERCC-3 | -5.303558 | 0.0080761 |
| K03668 hslJ; heat shock protein HslJ | -5.366141 | 0.0015410 |
| K01713 pheC; cyclohexadienyl dehydratase | -5.421247 | 0.0000006 |
| K05563 phaF; multicomponent K+:H+ antiporter subunit F | -5.475315 | 0.0059606 |
| K03690 ubiJ; ubiquinone biosynthesis protein UbiJ | -5.479267 | 0.0002922 |
| K02062 thiQ; thiamine transport system ATP-binding protein | -5.485549 | 0.0067886 |
| K19046 casB, cse2; CRISPR system Cascade subunit CasB | -5.526899 | 0.0000001 |
| K01560 E3.8.1.2; 2-haloacid dehalogenase | -5.553855 | 0.0008261 |
| K00184 K00184; prokaryotic molybdopterin-containing oxidoreductase family, iron-sulfur binding subunit | -5.554969 | 0.0000020 |
| K15085 SLC25A42; solute carrier family 25, member 42 | -5.555635 | 0.0000546 |
| K00023 phbB; acetoacetyl-CoA reductase | -5.564308 | 0.0027237 |
| K07498 K07498; putative transposase | -5.634280 | 0.0023036 |
| K19506 PTS-Gfr-EIIA, gfrA; PTS system, fructoselysine/glucoselysine-specific IIA component | -5.752383 | 0.0003501 |
| K12990 rfbF, rhlC; rhamnosyltransferase | -5.758307 | 0.0000971 |
| K06988 fno; 8-hydroxy-5-deazaflavin:NADPH oxidoreductase | -5.771986 | 0.0000954 |
| K06996 K06996; uncharacterized protein | -5.816659 | 0.0003177 |
| K16237 aroP; aromatic amino acid permease | -5.843438 | 0.0072962 |
| K00392 sir; sulfite reductase (ferredoxin) | -5.853665 | 0.0057438 |
| K19507 PTS-Gfr-EIIB, gfrB; PTS system, fructoselysine/glucoselysine-specific IIB component | -5.866405 | 0.0000158 |
| K07775 resD; two-component system, OmpR family, response regulator ResD | -5.922751 | 0.0004437 |
| K02153 ATPeV0E, ATP6H; V-type H+-transporting ATPase subunit e | -5.944180 | 0.0081414 |
| K16651 pduX; L-threonine kinase | -5.947723 | 0.0066696 |
| K00613 GATM; glycine amidinotransferase | -5.947723 | 0.0069408 |
| K01198 xynB; xylan 1,4-beta-xylosidase | -5.968610 | 0.0025171 |
| K04032 eutT; ethanolamine utilization cobalamin adenosyltransferase | -5.989700 | 0.0000007 |
| K11003 hlyD, cyaD; hemolysin D | -6.010637 | 0.0055756 |
| K19167 abiQ; protein AbiQ | -6.140900 | 0.0000001 |
| K02052 ABC.SP.A; putative spermidine/putrescine transport system ATP-binding protein | -6.158652 | 0.0011138 |
| K01160 rusA; crossover junction endodeoxyribonuclease RusA | -6.237059 | 0.0006660 |
| K03855 fixX; ferredoxin like protein | -6.252921 | 0.0000389 |
| K17947 wbiB; dTDP-L-rhamnose 4-epimerase | -6.270065 | 0.0000005 |
| K00318 PRODH, fadM, putB; proline dehydrogenase | -6.281877 | 0.0063591 |
| K09138 K09138; uncharacterized protein | -6.313006 | 0.0054260 |
| K03577 acrR, smeT; TetR/AcrR family transcriptional regulator, acrAB operon repressor | -6.332311 | 0.0081894 |
| K19166 higB; mRNA interferase HigB | -6.353277 | 0.0000080 |
| K02275 coxB, ctaC; cytochrome c oxidase subunit II | -6.363340 | 0.0065516 |
| K00590 E2.1.1.113; site-specific DNA-methyltransferase (cytosine-N4-specific) | -6.370443 | 0.0024593 |
| K12795 SUGT1, SGT1; suppressor of G2 allele of SKP1 | -6.378740 | 0.0000002 |
| K07275 ompW; outer membrane protein | -6.395978 | 0.0000073 |
| K19180 tll; dTDP-6-deoxy-L-talose 4-dehydrogenase (NAD+) | -6.531763 | 0.0073856 |
| K01985 5SrRNA, rrf; 5S ribosomal RNA | -6.627921 | 0.0013100 |
| K03828 yjgM; putative acetyltransferase | -6.629823 | 0.0053083 |
| K07045 K07045; uncharacterized protein | -6.808462 | 0.0018988 |
| K14989 salR; two-component system, NarL family, secretion system response regulator SalR | -7.160810 | 0.0048519 |
| K03817 rimL; ribosomal-protein-serine acetyltransferase | -7.180745 | 0.0081051 |
| K01430 ureA; urease subunit gamma | -7.241866 | 0.0032330 |
| K10778 ada; AraC family transcriptional regulator, regulatory protein of adaptative response / methylated-DNA- | -7.312382 | 0.0077404 |
| K12415 comC; competence-stimulating peptide | -7.473230 | 0.0000189 |
| K02039 phoU; phosphate transport system protein | -7.543947 | 0.0066958 |
Negative fold changes are higher in buccal samples while positive are higher in buccal_euk samples.
bbe = kos.loc[:, ['Buccal_Buccal_euk_FC', 'Buccal_Buccal_euk_p']]
bbe = bbe[bbe['Buccal_Buccal_euk_p'] < 0.01]
bbe = bbe.sort_values(by=['Buccal_Buccal_euk_FC'], ascending=False).rename(columns={'Buccal_Buccal_euk_FC':'Log2 fold change', 'Buccal_Buccal_euk_p':'p'})
bbe = pd.concat([bbe.head(n=50), bbe.tail(n=50)])
bbe = bbe.groupby(by=bbe.index, axis=0).mean()
bbe.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bbe %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| K03232 EEF1B; elongation factor 1-beta | 1.467244 | 0.0057926 |
| K17392 IGF2BP2; insulin-like growth factor 2 mRNA-binding protein 2 | 1.414129 | 0.0094551 |
| K06210 NMNAT; nicotinamide mononucleotide adenylyltransferase | 1.148057 | 0.0004174 |
| K04496 CTBP; C-terminal binding protein | 1.080461 | 0.0033637 |
| K03960 NDUFB4; NADH dehydrogenase (ubiquinone) 1 beta subcomplex subunit 4 | 1.015066 | 0.0022632 |
| K15410 EEF1D; elongation factor 1-delta | -1.280323 | 0.0041850 |
| K02993 RP-S7e, RPS7; small subunit ribosomal protein S7e | -1.284215 | 0.0031800 |
| K16494 PCDHB; protocadherin beta | -1.541813 | 0.0019962 |
| K05840 DRD5; dopamine receptor D5 | -1.566770 | 0.0000811 |
| K04079 HSP90A, htpG; molecular chaperone HtpG | -1.742817 | 0.0031099 |
| K06911 K06911; uncharacterized protein | -2.045255 | 0.0087367 |
| K02256 COX1; cytochrome c oxidase subunit 1 | -2.094969 | 0.0000953 |
| K03881 ND4; NADH-ubiquinone oxidoreductase chain 4 | -2.293579 | 0.0000154 |
| K03884 ND6; NADH-ubiquinone oxidoreductase chain 6 | -2.299784 | 0.0002496 |
| K02261 COX2; cytochrome c oxidase subunit 2 | -2.316261 | 0.0000849 |
| K03879 ND2; NADH-ubiquinone oxidoreductase chain 2 | -2.426515 | 0.0001048 |
| K02262 COX3; cytochrome c oxidase subunit 3 | -2.431236 | 0.0002102 |
| K03878 ND1; NADH-ubiquinone oxidoreductase chain 1 | -2.438527 | 0.0001136 |
| K00412 CYTB, petB; ubiquinol-cytochrome c reductase cytochrome b subunit | -2.444177 | 0.0000958 |
| K02126 ATPeF0A, MTATP6, ATP6; F-type H+-transporting ATPase subunit a | -2.503513 | 0.0000047 |
| K03883 ND5; NADH-ubiquinone oxidoreductase chain 5 | -2.582554 | 0.0000143 |
| K03880 ND3; NADH-ubiquinone oxidoreductase chain 3 | -2.677270 | 0.0003982 |
| K10439 rbsB; ribose transport system substrate-binding protein | -2.687404 | 0.0092403 |
| K02881 RP-L18, MRPL18, rplR; large subunit ribosomal protein L18 | -2.748077 | 0.0051131 |
| K03882 ND4L; NADH-ubiquinone oxidoreductase chain 4L | -2.765365 | 0.0000193 |
| K00950 folK; 2-amino-4-hydroxy-6-hydroxymethyldihydropteridine diphosphokinase | -2.802235 | 0.0021672 |
| K03559 exbD; biopolymer transport protein ExbD | -3.012558 | 0.0031365 |
| K03218 rlmB; 23S rRNA (guanosine2251-2’-O)-methyltransferase | -3.116803 | 0.0095932 |
| K03925 mraZ; MraZ protein | -3.154068 | 0.0097584 |
| K03319 TC.DASS; divalent anion:Na+ symporter, DASS family | -3.195524 | 0.0053183 |
| K18928 lldE; L-lactate dehydrogenase complex protein LldE | -3.294311 | 0.0023776 |
| K08610 ADAM21; disintegrin and metalloproteinase domain-containing protein 21 | -3.340978 | 0.0000233 |
| K16741 ALMS1; alstrom syndrome protein 1 | -3.356812 | 0.0001533 |
| K02517 lpxL, htrB; Kdo2-lipid IVA lauroyltransferase/acyltransferase | -3.404739 | 0.0054749 |
| K00782 lldG; L-lactate dehydrogenase complex protein LldG | -3.419871 | 0.0035777 |
| K06281 hyaB, hybC; hydrogenase large subunit | -3.429246 | 0.0090692 |
| K03723 mfd; transcription-repair coupling factor (superfamily II helicase) | -3.464216 | 0.0081496 |
| K00525 E1.17.4.1A, nrdA, nrdE; ribonucleoside-diphosphate reductase alpha chain | -3.500904 | 0.0060818 |
| K03596 lepA; GTP-binding protein LepA | -3.522567 | 0.0099846 |
| K05786 rarD; chloramphenicol-sensitive protein RarD | -3.534048 | 0.0062804 |
| K01207 nagZ; beta-N-acetylhexosaminidase | -3.539285 | 0.0051537 |
| K04290 S1PR3, EDG3; sphingosine 1-phosphate receptor 3 | -3.577329 | 0.0000749 |
| K01610 E4.1.1.49, pckA; phosphoenolpyruvate carboxykinase (ATP) | -3.666843 | 0.0014867 |
| K03496 parA, soj; chromosome partitioning protein | -3.755157 | 0.0065088 |
| K00831 serC, PSAT1; phosphoserine aminotransferase | -3.811166 | 0.0002465 |
| K01911 menE; O-succinylbenzoic acid—CoA ligase | -3.909487 | 0.0088855 |
| K01753 dsdA; D-serine dehydratase | -3.974443 | 0.0000856 |
| K12437 pks13; polyketide synthase 13 | -4.015661 | 0.0011796 |
| K03629 recF; DNA replication and repair protein RecF | -4.034093 | 0.0051844 |
| K16090 fiu; catecholate siderophore receptor | -4.160175 | 0.0000637 |
| K02000 proV; glycine betaine/proline transport system ATP-binding protein | -8.436459 | 0.0000000 |
| K09117 K09117; uncharacterized protein | -8.481400 | 0.0000214 |
| K03773 fklB; FKBP-type peptidyl-prolyl cis-trans isomerase FklB | -8.510498 | 0.0059909 |
| K00335 nuoF; NADH-quinone oxidoreductase subunit F | -8.514234 | 0.0012381 |
| K07776 regX3; two-component system, OmpR family, response regulator RegX3 | -8.536497 | 0.0043416 |
| K04516 AROA1, aroA; chorismate mutase | -8.558918 | 0.0000679 |
| K00012 UGDH, ugd; UDPglucose 6-dehydrogenase | -8.582749 | 0.0088989 |
| K04095 fic; cell filamentation protein | -8.586069 | 0.0000049 |
| K03071 secB; preprotein translocase subunit SecB | -8.616858 | 0.0086368 |
| K10119 msmG; raffinose/stachyose/melibiose transport system permease protein | -8.635010 | 0.0074487 |
| K02075 ABC.ZM.P; zinc/manganese transport system permease protein | -8.649968 | 0.0086134 |
| K02217 ftnA, ftn; ferritin | -8.659756 | 0.0058665 |
| K03297 emrE, qac, mmr, smr; small multidrug resistance pump | -8.674286 | 0.0008830 |
| K07481 K07481; transposase, IS5 family | -8.686355 | 0.0008838 |
| K02065 mlaF, linL, mkl; phospholipid/cholesterol/gamma-HCH transport system ATP-binding protein | -8.699583 | 0.0009880 |
| K02007 cbiM; cobalt/nickel transport system permease protein | -8.814449 | 0.0000073 |
| K09890 arfA; alternative ribosome-rescue factor | -8.835224 | 0.0063704 |
| K03797 E3.4.21.102, prc, ctpA; carboxyl-terminal processing protease | -8.835284 | 0.0037340 |
| K00348 nqrC; Na+-transporting NADH:ubiquinone oxidoreductase subunit C | -8.838146 | 0.0087200 |
| K03719 lrp; Lrp/AsnC family transcriptional regulator, leucine-responsive regulatory protein | -8.845129 | 0.0000017 |
| K02536 lpxD; UDP-3-O- | -8.856674 | 0.0000006 |
| K07084 yuiF; putative amino acid transporter | -8.864766 | 0.0029329 |
| K13256 psiE; protein PsiE | -8.872480 | 0.0050807 |
| K09765 queH; epoxyqueuosine reductase | -8.893468 | 0.0061978 |
| K00647 fabB; 3-oxoacyl- | -8.911359 | 0.0014707 |
| K01872 AARS, alaS; alanyl-tRNA synthetase | -8.936109 | 0.0013203 |
| K07571 K07571; S1 RNA binding domain protein | -8.972784 | 0.0000418 |
| K02500 hisF; imidazole glycerol-phosphate synthase subunit HisF | -8.994848 | 0.0080137 |
| K00275 pdxH, PNPO; pyridoxamine 5’-phosphate oxidase | -9.003445 | 0.0000003 |
| K00677 lpxA; UDP-N-acetylglucosamine acyltransferase | -9.009434 | 0.0000036 |
| K15580 oppA, mppA; oligopeptide transport system substrate-binding protein | -9.039172 | 0.0053359 |
| K05595 marC; multiple antibiotic resistance protein | -9.059555 | 0.0013466 |
| K08234 yaeR; glyoxylase I family protein | -9.061412 | 0.0066773 |
| K10009 tcyB, yecS; L-cystine transport system permease protein | -9.069022 | 0.0046980 |
| K09791 K09791; uncharacterized protein | -9.197084 | 0.0000262 |
| K03684 rnd; ribonuclease D | -9.310103 | 0.0034952 |
| K00024 mdh; malate dehydrogenase | -9.378307 | 0.0018311 |
| K01627 kdsA; 2-dehydro-3-deoxyphosphooctonate aldolase (KDO 8-P synthase) | -9.391367 | 0.0096899 |
| K02032 ABC.PE.A1; peptide/nickel transport system ATP-binding protein | -9.395496 | 0.0022133 |
| K07075 K07075; uncharacterized protein | -9.406457 | 0.0000642 |
| K06891 clpS; ATP-dependent Clp protease adaptor protein ClpS | -9.419182 | 0.0000077 |
| K02348 elaA; ElaA protein | -9.447487 | 0.0046015 |
| K02803 PTS-Nag-EIIB, nagE; PTS system, N-acetylglucosamine-specific IIB component | -9.454598 | 0.0016069 |
| K03569 mreB; rod shape-determining protein MreB and related proteins | -9.496033 | 0.0028899 |
| K03924 moxR; MoxR-like ATPase | -9.509025 | 0.0061251 |
| K07053 E3.1.3.97; 3’,5’-nucleoside bisphosphate phosphatase | -9.753894 | 0.0071143 |
| K09794 K09794; uncharacterized protein | -9.836549 | 0.0000028 |
| K07507 mgtC; putative Mg2+ transporter-C (MgtC) family protein | -10.261910 | 0.0055478 |
| K06925 tsaE; tRNA threonylcarbamoyladenosine biosynthesis protein TsaE | -10.825951 | 0.0032855 |
| K07166 K07166; ACT domain-containing protein | -11.318376 | 0.0081144 |
Negative fold changes are higher in blood samples while positive are higher in saliva_euk samples.
blbe = kos.loc[:, ['Blood_Saliva_euk_FC', 'Blood_Saliva_euk_p']]
blbe = blbe[blbe['Blood_Saliva_euk_p'] < 0.01]
blbe = blbe.sort_values(by=['Blood_Saliva_euk_FC'], ascending=False).rename(columns={'Blood_Saliva_euk_FC':'Log2 fold change', 'Blood_Saliva_euk_p':'p'})
blbe = pd.concat([blbe.head(n=50), blbe.tail(n=50)])
blbe = blbe.groupby(by=blbe.index, axis=0).mean()
blbe.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$blbe %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| K02913 RP-L33, MRPL33, rpmG; large subunit ribosomal protein L33 | 15.8599627 | 0.0000000 |
| K02914 RP-L34, MRPL34, rpmH; large subunit ribosomal protein L34 | 14.4043152 | 0.0000045 |
| K02078 acpP; acyl carrier protein | 13.5786127 | 0.0000004 |
| K02970 RP-S21, MRPS21, rpsU; small subunit ribosomal protein S21 | 13.5595018 | 0.0001025 |
| K02518 infA; translation initiation factor IF-1 | 13.4822104 | 0.0000008 |
| K02911 RP-L32, MRPL32, rpmF; large subunit ribosomal protein L32 | 13.4537152 | 0.0000097 |
| K01992 ABC-2.P; ABC-2 type transport system permease protein | 13.4439104 | 0.0000026 |
| K03530 hupB; DNA-binding protein HU-beta | 13.4266761 | 0.0000025 |
| K02907 RP-L30, MRPL30, rpmD; large subunit ribosomal protein L30 | 13.3759103 | 0.0000811 |
| K02902 RP-L28, MRPL28, rpmB; large subunit ribosomal protein L28 | 13.3749776 | 0.0000062 |
| K02916 RP-L35, MRPL35, rpmI; large subunit ribosomal protein L35 | 13.3685308 | 0.0000002 |
| K01990 ABC-2.A; ABC-2 type transport system ATP-binding protein | 13.3105462 | 0.0000021 |
| K03073 secE; preprotein translocase subunit SecE | 13.3041245 | 0.0000081 |
| K02909 RP-L31, rpmE; large subunit ribosomal protein L31 | 13.2838540 | 0.0000045 |
| K02965 RP-S19, rpsS; small subunit ribosomal protein S19 | 13.1295251 | 0.0000008 |
| K02961 RP-S17, MRPS17, rpsQ; small subunit ribosomal protein S17 | 13.1229086 | 0.0015489 |
| K02899 RP-L27, MRPL27, rpmA; large subunit ribosomal protein L27 | 13.1228108 | 0.0000027 |
| K02963 RP-S18, MRPS18, rpsR; small subunit ribosomal protein S18 | 13.1077884 | 0.0034733 |
| K02968 RP-S20, rpsT; small subunit ribosomal protein S20 | 13.1014287 | 0.0000357 |
| K07496 K07496; putative transposase | 13.0494535 | 0.0002718 |
| K07024 SPP; sucrose-6-phosphatase | 13.0489273 | 0.0000011 |
| K02013 ABC.FEV.A; iron complex transport system ATP-binding protein | 13.0228458 | 0.0000024 |
| K02946 RP-S10, MRPS10, rpsJ; small subunit ribosomal protein S10 | 12.9606831 | 0.0000051 |
| K02935 RP-L7, MRPL12, rplL; large subunit ribosomal protein L7/L12 | 12.9321671 | 0.0000059 |
| K02003 ABC.CD.A; putative ABC transport system ATP-binding protein | 12.8907924 | 0.0000058 |
| K02954 RP-S14, MRPS14, rpsN; small subunit ribosomal protein S14 | 12.8857555 | 0.0000060 |
| K02881 RP-L18, MRPL18, rplR; large subunit ribosomal protein L18 | 12.8791002 | 0.0000032 |
| K02888 RP-L21, MRPL21, rplU; large subunit ribosomal protein L21 | 12.8509584 | 0.0000069 |
| K06142 hlpA, ompH; outer membrane protein | 12.8497520 | 0.0000007 |
| K02948 RP-S11, MRPS11, rpsK; small subunit ribosomal protein S11 | 12.8486413 | 0.0000027 |
| K02887 RP-L20, MRPL20, rplT; large subunit ribosomal protein L20 | 12.8465529 | 0.0000059 |
| K02876 RP-L15, MRPL15, rplO; large subunit ribosomal protein L15 | 12.8284344 | 0.0000062 |
| K02956 RP-S15, MRPS15, rpsO; small subunit ribosomal protein S15 | 12.8000670 | 0.0000357 |
| K02959 RP-S16, MRPS16, rpsP; small subunit ribosomal protein S16 | 12.7890104 | 0.0000324 |
| K02996 RP-S9, MRPS9, rpsI; small subunit ribosomal protein S9 | 12.7888721 | 0.0000007 |
| K02950 RP-S12, MRPS12, rpsL; small subunit ribosomal protein S12 | 12.7788152 | 0.0000084 |
| K02879 RP-L17, MRPL17, rplQ; large subunit ribosomal protein L17 | 12.7729633 | 0.0028441 |
| K02884 RP-L19, MRPL19, rplS; large subunit ribosomal protein L19 | 12.7655111 | 0.0000007 |
| K02952 RP-S13, rpsM; small subunit ribosomal protein S13 | 12.7558847 | 0.0000334 |
| K02994 RP-S8, rpsH; small subunit ribosomal protein S8 | 12.7486487 | 0.0000004 |
| K01358 clpP, CLPP; ATP-dependent Clp protease, protease subunit | 12.7150029 | 0.0000057 |
| K03496 parA, soj; chromosome partitioning protein | 12.7090303 | 0.0000130 |
| K02878 RP-L16, MRPL16, rplP; large subunit ribosomal protein L16 | 12.6748519 | 0.0000017 |
| K02890 RP-L22, MRPL22, rplV; large subunit ribosomal protein L22 | 12.6714513 | 0.0000177 |
| K02904 RP-L29, rpmC; large subunit ribosomal protein L29 | 12.6586979 | 0.0000430 |
| K06147 ABCB-BAC; ATP-binding cassette, subfamily B, bacterial | 12.6575291 | 0.0000035 |
| K02931 RP-L5, MRPL5, rplE; large subunit ribosomal protein L5 | 12.6275570 | 0.0000019 |
| K02015 ABC.FEV.P; iron complex transport system permease protein | 12.6193624 | 0.0000018 |
| K03088 rpoE; RNA polymerase sigma-70 factor, ECF subfamily | 12.6135742 | 0.0000013 |
| K02867 RP-L11, MRPL11, rplK; large subunit ribosomal protein L11 | 12.6094849 | 0.0000105 |
| K04074 divIVA; cell division initiation protein | 5.7454479 | 0.0004852 |
| K01541 | 5.7154325 | 0.0000648 |
| K02817 PTS-Tre-EIIA, treP; PTS system, trehalose-specific IIA component | 5.7098664 | 0.0000062 |
| K12172 RANBP2, NUP358; E3 SUMO-protein ligase RanBP2 | 5.6757778 | 0.0000068 |
| K03582 recB; exodeoxyribonuclease V beta subunit | 5.6751309 | 0.0002133 |
| K09529 DNAJC9; DnaJ homolog subfamily C member 9 | 5.6736436 | 0.0000001 |
| K04313 GPR6; G protein-coupled receptor 6 | 5.6672245 | 0.0000146 |
| K04683 TFDP1; transcription factor Dp-1 | 5.6427218 | 0.0000086 |
| K00750 GYG1, GYG2; glycogenin | 5.6013967 | 0.0000001 |
| K11842 USP12_46; ubiquitin carboxyl-terminal hydrolase 17137 | 5.5921757 | 0.0000001 |
| K00121 frmA, ADH5, adhC; S-(hydroxymethyl)glutathione dehydrogenase / alcohol dehydrogenase | 5.5885080 | 0.0000365 |
| K19083 braD, bceA; bacitracin transport system ATP-binding protein | 5.5274928 | 0.0001288 |
| K17455 FSCN2; fascin2 | 5.5021798 | 0.0000008 |
| K01209 abfA; alpha-L-arabinofuranosidase | 5.4857734 | 0.0000004 |
| K13197 IGF2BP3; insulin-like growth factor 2 mRNA-binding protein 3 | 5.4571779 | 0.0000002 |
| K01101 E3.1.3.41; 4-nitrophenyl phosphatase | 5.3807031 | 0.0000120 |
| K03095 sprL; SprT-like protein | 5.3676058 | 0.0000007 |
| K11090 LA, SSB; lupus La protein | 5.3458764 | 0.0000129 |
| K00731 C1GALT1; glycoprotein-N-acetylgalactosamine 3-beta-galactosyltransferase | 5.3326491 | 0.0000413 |
| K02265 COX5B; cytochrome c oxidase subunit 5b | 5.3323070 | 0.0000053 |
| K04288 S1PR1, EDG1, CD363; sphingosine 1-phosphate receptor 1 | 5.2652232 | 0.0000008 |
| K17274 S100A10; calpactin-1 light chain | 5.2565093 | 0.0000327 |
| K17391 IGF2BP1; insulin-like growth factor 2 mRNA-binding protein 1 | 5.2516136 | 0.0001783 |
| K06053 RBPSUH, RBPJK; recombining binding protein suppressor of hairless | 5.1890617 | 0.0000017 |
| K12755 MYL9; myosin regulatory light chain 9 | 5.0889277 | 0.0000783 |
| K04875 KCNA2, KV1.2; potassium voltage-gated channel Shaker-related subfamily A member 2 | 5.0600541 | 0.0000005 |
| K11904 vgrG; type VI secretion system secreted protein VgrG | 5.0367433 | 0.0035040 |
| K07986 ULBP; UL16 binding protein | 4.9550450 | 0.0003358 |
| K10752 RBBP4, HAT2, CAF1, MIS16; histone-binding protein RBBP4 | 4.9425193 | 0.0000015 |
| K08643 zmpB; zinc metalloprotease ZmpB | 4.9348739 | 0.0011437 |
| K19611 fepA, pfeA, iroN, pirA; ferric enterobactin receptor | 4.7953583 | 0.0000419 |
| K17624 engCP, engBF, endoEF; endo-alpha-N-acetylgalactosaminidase | 4.6651482 | 0.0006807 |
| K06727 FCRL, IRTA, CD307; Fc receptor-like protein | 4.6457501 | 0.0000708 |
| K04874 KCNA1, KV1.1; potassium voltage-gated channel Shaker-related subfamily A member 1 | 4.6189248 | 0.0000001 |
| K14682 argAB; amino-acid N-acetyltransferase | 4.5573539 | 0.0014321 |
| K04876 KCNA3, KV1.3; potassium voltage-gated channel Shaker-related subfamily A member 3 | 4.5167626 | 0.0000083 |
| K04292 S1PR2, EDG5; sphingosine 1-phosphate receptor 2 | 4.2912577 | 0.0002126 |
| K02001 proW; glycine betaine/proline transport system permease protein | 4.1421000 | 0.0005923 |
| K01390 iga; IgA-specific metalloendopeptidase | 4.0510644 | 0.0036671 |
| K00134 GAPDH, gapA; glyceraldehyde 3-phosphate dehydrogenase | 3.9642023 | 0.0009294 |
| K01940 argG, ASS1; argininosuccinate synthase | 2.7965061 | 0.0076286 |
| K03767 PPIA; peptidyl-prolyl cis-trans isomerase A (cyclophilin A) | 1.4732279 | 0.0039824 |
| K05863 SLC25A4S, ANT; solute carrier family 25 (mitochondrial adenine nucleotide translocator), member 4/5/6/31 | -0.8221195 | 0.0030452 |
| K02261 COX2; cytochrome c oxidase subunit 2 | -1.3314744 | 0.0099234 |
| K03879 ND2; NADH-ubiquinone oxidoreductase chain 2 | -1.3379217 | 0.0020646 |
| K02262 COX3; cytochrome c oxidase subunit 3 | -1.4490693 | 0.0005670 |
| K02256 COX1; cytochrome c oxidase subunit 1 | -1.5164495 | 0.0005461 |
| K03878 ND1; NADH-ubiquinone oxidoreductase chain 1 | -1.6760517 | 0.0034799 |
| K03881 ND4; NADH-ubiquinone oxidoreductase chain 4 | -1.8265865 | 0.0027652 |
| K03883 ND5; NADH-ubiquinone oxidoreductase chain 5 | -1.8764987 | 0.0010947 |
Negative fold changes are higher in blood samples while positive are higher in buccal_euk samples.
blse = kos.loc[:, ['Blood_Buccal_euk_FC', 'Blood_Buccal_euk_p']]
blse = blse[blse['Blood_Buccal_euk_p'] < 0.01]
blse = blse.sort_values(by=['Blood_Buccal_euk_FC'], ascending=False).rename(columns={'Blood_Buccal_euk_FC':'Log2 fold change', 'Blood_Buccal_euk_p':'p'})
blse = pd.concat([blse.head(n=50), blse.tail(n=50)])
blse = blse.groupby(by=blse.index, axis=0).mean()
blse.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$blse %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| K02913 RP-L33, MRPL33, rpmG; large subunit ribosomal protein L33 | 16.1818093 | 0.0002471 |
| K01992 ABC-2.P; ABC-2 type transport system permease protein | 11.5128420 | 0.0000618 |
| K03574 mutT, NUDT15, MTH2; 8-oxo-dGTP diphosphatase | 10.9861718 | 0.0001413 |
| K02904 RP-L29, rpmC; large subunit ribosomal protein L29 | 10.8776580 | 0.0000007 |
| K01756 purB, ADSL; adenylosuccinate lyase | 10.8536183 | 0.0005353 |
| K07024 SPP; sucrose-6-phosphatase | 10.7967873 | 0.0005105 |
| K02954 RP-S14, MRPS14, rpsN; small subunit ribosomal protein S14 | 10.6293239 | 0.0000120 |
| K02518 infA; translation initiation factor IF-1 | 10.3714855 | 0.0000136 |
| K02078 acpP; acyl carrier protein | 10.3023185 | 0.0000021 |
| K01104 E3.1.3.48; protein-tyrosine phosphatase | 10.0810563 | 0.0000283 |
| K03075 secG; preprotein translocase subunit SecG | 10.0686858 | 0.0000066 |
| K00948 PRPS, prsA; ribose-phosphate pyrophosphokinase | 10.0608984 | 0.0000517 |
| K08998 K08998; uncharacterized protein | 10.0573727 | 0.0001643 |
| K03499 trkA, ktrA; trk system potassium uptake protein | 10.0058758 | 0.0000700 |
| K02950 RP-S12, MRPS12, rpsL; small subunit ribosomal protein S12 | 9.9877413 | 0.0001515 |
| K16787 ecfA2; energy-coupling factor transport system ATP-binding protein | 9.9691833 | 0.0001840 |
| K02372 fabZ; 3-hydroxyacyl- | 9.9637621 | 0.0000294 |
| K00939 adk, AK; adenylate kinase | 9.9595009 | 0.0078494 |
| K02871 RP-L13, MRPL13, rplM; large subunit ribosomal protein L13 | 9.8753227 | 0.0000010 |
| K02015 ABC.FEV.P; iron complex transport system permease protein | 9.8667187 | 0.0002448 |
| K02440 GLPF; glycerol uptake facilitator protein | 9.8408240 | 0.0001011 |
| K04564 SOD2; superoxide dismutase, Fe-Mn family | 9.7810834 | 0.0000055 |
| K11991 tadA; tRNA(adenine34) deaminase | 9.7639518 | 0.0000219 |
| K02881 RP-L18, MRPL18, rplR; large subunit ribosomal protein L18 | 9.7624176 | 0.0000000 |
| K06199 crcB, FEX; fluoride exporter | 9.7233258 | 0.0000191 |
| K00928 lysC; aspartate kinase | 9.7065719 | 0.0003610 |
| K02886 RP-L2, MRPL2, rplB; large subunit ribosomal protein L2 | 9.6906968 | 0.0000908 |
| K03073 secE; preprotein translocase subunit SecE | 9.6729322 | 0.0003735 |
| K01738 cysK; cysteine synthase | 9.6561421 | 0.0000015 |
| K00790 murA; UDP-N-acetylglucosamine 1-carboxyvinyltransferase | 9.6408742 | 0.0002500 |
| K09710 ybeB; ribosome-associated protein | 9.6404415 | 0.0000461 |
| K01246 tag; DNA-3-methyladenine glycosylase I | 9.6303301 | 0.0000045 |
| K04077 groEL, HSPD1; chaperonin GroEL | 9.5891273 | 0.0047969 |
| K01915 glnA, GLUL; glutamine synthetase | 9.5824446 | 0.0000279 |
| K01933 purM; phosphoribosylformylglycinamidine cyclo-ligase | 9.5736189 | 0.0002323 |
| K03687 GRPE; molecular chaperone GrpE | 9.5729556 | 0.0000106 |
| K00615 E2.2.1.1, tktA, tktB; transketolase | 9.5505026 | 0.0001070 |
| K02433 gatA, QRSL1; aspartyl-tRNA(Asn)/glutamyl-tRNA(Gln) amidotransferase subunit A | 9.5442794 | 0.0008482 |
| K01923 purC; phosphoribosylaminoimidazole-succinocarboxamide synthase | 9.5357375 | 0.0000074 |
| K02338 dnaN; DNA polymerase III subunit beta | 9.5332006 | 0.0001710 |
| K02992 RP-S7, MRPS7, rpsG; small subunit ribosomal protein S7 | 9.5083188 | 0.0002118 |
| K00820 glmS, GFPT; glutamine—fructose-6-phosphate transaminase (isomerizing) | 9.5003694 | 0.0001099 |
| K02996 RP-S9, MRPS9, rpsI; small subunit ribosomal protein S9 | 9.4773279 | 0.0000790 |
| K03671 trxA; thioredoxin 1 | 9.4758376 | 0.0000215 |
| K01687 ilvD; dihydroxy-acid dehydratase | 9.4662150 | 0.0003556 |
| K03708 ctsR; transcriptional regulator of stress and heat shock response | 9.4498962 | 0.0000162 |
| K02314 dnaB; replicative DNA helicase | 9.4429692 | 0.0002617 |
| K01963 accD; acetyl-CoA carboxylase carboxyl transferase subunit beta | 9.4244850 | 0.0001347 |
| K07271 licD; lipopolysaccharide cholinephosphotransferase | 9.4218430 | 0.0002400 |
| K02837 prfC; peptide chain release factor 3 | 9.3918449 | 0.0002680 |
| K03284 corA; magnesium transporter | 7.1522003 | 0.0001015 |
| K01661 menB; naphthoate synthase | 7.1493059 | 0.0000647 |
| K01647 CS, gltA; citrate synthase | 7.1195886 | 0.0000067 |
| K18928 lldE; L-lactate dehydrogenase complex protein LldE | 7.0983073 | 0.0000061 |
| K12757 MYL12; myosin regulatory light chain 12 | 7.0955579 | 0.0000007 |
| K05840 DRD5; dopamine receptor D5 | 7.0636557 | 0.0000000 |
| K08857 NEK1_4_5; NIMA (never in mitosis gene a)-related kinase 38443 | 7.0589315 | 0.0000035 |
| K03466 ftsK, spoIIIE; DNA segregation ATPase FtsK/SpoIIIE, S-DNA-T family | 7.0565895 | 0.0008980 |
| K03960 NDUFB4; NADH dehydrogenase (ubiquinone) 1 beta subcomplex subunit 4 | 7.0253055 | 0.0000000 |
| K03639 moaA, CNX2; GTP 3’,8-cyclase | 7.0054202 | 0.0000799 |
| K00782 lldG; L-lactate dehydrogenase complex protein LldG | 7.0019641 | 0.0000004 |
| K14455 GOT2; aspartate aminotransferase, mitochondrial | 6.9945243 | 0.0000001 |
| K03629 recF; DNA replication and repair protein RecF | 6.9883502 | 0.0000612 |
| K00653 CDY; chromodomain protein Y | 6.9102327 | 0.0000007 |
| K05786 rarD; chloramphenicol-sensitive protein RarD | 6.9046115 | 0.0000255 |
| K00371 narH, narY, nxrB; nitrate reductase / nitrite oxidoreductase, beta subunit | 6.8665611 | 0.0001685 |
| K03547 sbcD, mre11; DNA repair protein SbcD/Mre11 | 6.8586688 | 0.0000767 |
| K02342 dnaQ; DNA polymerase III subunit epsilon | 6.7129347 | 0.0000956 |
| K00525 E1.17.4.1A, nrdA, nrdE; ribonucleoside-diphosphate reductase alpha chain | 6.7013656 | 0.0000661 |
| K17455 FSCN2; fascin2 | 6.6408974 | 0.0000088 |
| K00240 sdhB, frdB; succinate dehydrogenase / fumarate reductase, iron-sulfur subunit | 6.6270061 | 0.0013377 |
| K10343 SPSB1_4, SSB1, SSB4; SPRY domain-containing SOCS box protein 43922 | 6.5750024 | 0.0000001 |
| K03723 mfd; transcription-repair coupling factor (superfamily II helicase) | 6.5721780 | 0.0000566 |
| K01778 dapF; diaminopimelate epimerase | 6.5471812 | 0.0000360 |
| K01207 nagZ; beta-N-acetylhexosaminidase | 6.4067020 | 0.0000378 |
| K01652 E2.2.1.6L, ilvB, ilvG, ilvI; acetolactate synthase I/II/III large subunit | 6.3920399 | 0.0007322 |
| K06911 K06911; uncharacterized protein | 6.3133922 | 0.0000030 |
| K02858 ribB, RIB3; 3,4-dihydroxy 2-butanone 4-phosphate synthase | 6.3106401 | 0.0003793 |
| K03527 ispH, lytB; 4-hydroxy-3-methylbut-2-en-1-yl diphosphate reductase | 6.1477167 | 0.0002274 |
| K01911 menE; O-succinylbenzoic acid—CoA ligase | 6.1373608 | 0.0000285 |
| K01744 aspA; aspartate ammonia-lyase | 6.0818258 | 0.0002584 |
| K00099 dxr; 1-deoxy-D-xylulose-5-phosphate reductoisomerase | 5.9628220 | 0.0004205 |
| K04875 KCNA2, KV1.2; potassium voltage-gated channel Shaker-related subfamily A member 2 | 5.9414027 | 0.0000049 |
| K01610 E4.1.1.49, pckA; phosphoenolpyruvate carboxykinase (ATP) | 5.9186660 | 0.0000373 |
| K12755 MYL9; myosin regulatory light chain 9 | 5.8986788 | 0.0000010 |
| K03319 TC.DASS; divalent anion:Na+ symporter, DASS family | 5.8744409 | 0.0000259 |
| K10752 RBBP4, HAT2, CAF1, MIS16; histone-binding protein RBBP4 | 5.7928033 | 0.0000036 |
| K02517 lpxL, htrB; Kdo2-lipid IVA lauroyltransferase/acyltransferase | 5.7843811 | 0.0001123 |
| K17392 IGF2BP2; insulin-like growth factor 2 mRNA-binding protein 2 | 5.7651144 | 0.0000040 |
| K00831 serC, PSAT1; phosphoserine aminotransferase | 5.7394312 | 0.0000028 |
| K02930 RP-L4e, RPL4; large subunit ribosomal protein L4e | 5.6662498 | 0.0000003 |
| K08301 rng, cafA; ribonuclease G | 5.6512400 | 0.0000924 |
| K06281 hyaB, hybC; hydrogenase large subunit | 5.4295802 | 0.0001948 |
| K04874 KCNA1, KV1.1; potassium voltage-gated channel Shaker-related subfamily A member 1 | 5.4132320 | 0.0000080 |
| K03546 sbcC, rad50; DNA repair protein SbcC/Rad50 | 5.2338079 | 0.0017231 |
| K16092 btuB; vitamin B12 transporter | 4.7102953 | 0.0017984 |
| K16495 PCDHGA; protocadherin gamma subfamily A | 4.5153796 | 0.0000049 |
| K00134 GAPDH, gapA; glyceraldehyde 3-phosphate dehydrogenase | 1.7257634 | 0.0057157 |
| K06478 PTPRC, CD45; receptor-type tyrosine-protein phosphatase C | -0.3261990 | 0.0020246 |
| K00522 FTH1; ferritin heavy chain | -0.9651333 | 0.0070830 |
As for HUMAnN2 I am now looking at the KEGG orthologs assigned to the Uniref50 database.
kos = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann3/genes_ko_50.csv', header=0, index_col=0)
kegg_list = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kegg_list.csv', header=0, index_col=0)
kegg_list.drop_duplicates(inplace=True)
kegg_dict = {}
kl = list(kegg_list.index.values)
for ko in kl:
kegg_dict[ko] = ko+' '+kegg_list.loc[ko, 'Product']
kos = kos.rename(index=kegg_dict)
Negative fold changes are higher in blood samples while positive are higher in buccal_euk samples.
bs3 = kos.loc[:, ['Saliva_Blood_FC', 'Saliva_Blood_p']]
bs3 = bs3[bs3['Saliva_Blood_p'] < 0.01]
bs3 = bs3.sort_values(by=['Saliva_Blood_FC'], ascending=False).rename(columns={'Saliva_Blood_FC':'Log2 fold change', 'Saliva_Blood_p':'p'})
bs3 = pd.concat([bs3.head(n=50), bs3.tail(n=50)])
bs3 = bs3.groupby(by=bs3.index, axis=0).mean()
bs3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bs3 %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| K02987 RP-S4e, RPS4; small subunit ribosomal protein S4e | 2.900731 | 0.0015733 |
| K05692 ACTB_G1; actin beta/gamma 1 | 2.742918 | 0.0005082 |
| K07604 KRT1; type I keratin, acidic | 1.975235 | 0.0047374 |
| K02984 RP-S3Ae, RPS3A; small subunit ribosomal protein S3Ae | 1.928509 | 0.0087034 |
| K19469 FTO; mRNA N6-methyladenine demethylase | 1.602431 | 0.0069474 |
| K03878 ND1; NADH-ubiquinone oxidoreductase chain 1 | 1.395337 | 0.0050401 |
| K03879 ND2; NADH-ubiquinone oxidoreductase chain 2 | 1.334215 | 0.0001341 |
| K02262 COX3; cytochrome c oxidase subunit 3 | 1.146687 | 0.0020483 |
| K03880 ND3; NADH-ubiquinone oxidoreductase chain 3 | 1.097022 | 0.0068121 |
| K02256 COX1; cytochrome c oxidase subunit 1 | -1.054638 | 0.0038354 |
| K03881 ND4; NADH-ubiquinone oxidoreductase chain 4 | -1.271552 | 0.0002763 |
| K09206 KLF5; krueppel-like factor 5 | -2.551036 | 0.0013898 |
| K03097 CSNK2A; casein kinase II subunit alpha | -2.640297 | 0.0002630 |
| K07208 RHEB; Ras homolog enriched in brain | -2.663001 | 0.0003144 |
| K09627 PRSS23; serine protease 23 | -2.935477 | 0.0035377 |
| K10571 DET1; de-etiolated-1 | -2.937331 | 0.0036227 |
| K05001 KCNJ8, KIR6.1; potassium inwardly-rectifying channel subfamily J member 8 | -3.017643 | 0.0019871 |
| K08428 GPR139; G protein-coupled receptor 139 | -3.127061 | 0.0008065 |
| K05421 BSF3, CLC; B-cell stimulating factor 3 | -3.152678 | 0.0004729 |
| K04902 KCNG3, KV6.3; potassium voltage-gated channel subfamily G member 3 | -3.156789 | 0.0025693 |
| K17658 TEFM; transcription elongation factor, mitochondrial | -3.176972 | 0.0002493 |
| K11182 AOC1, ABP1; diamine oxidase | -3.187546 | 0.0013948 |
| K06174 ABCE1, Rli1; ATP-binding cassette, sub-family E, member 1 | -3.256348 | 0.0000379 |
| K05002 KCNJ9, KIR3.3; potassium inwardly-rectifying channel subfamily J member 9 | -3.257962 | 0.0060961 |
| K04506 SIAH1; E3 ubiquitin-protein ligase SIAH1 | -3.295996 | 0.0021080 |
| K04136 ADRA1B; adrenergic receptor alpha-1B | -3.312017 | 0.0000004 |
| K06625 CDKN1A, P21, CIP1; cyclin-dependent kinase inhibitor 1A | -3.346373 | 0.0000829 |
| K10504 ZBTB25; zinc finger and BTB domain-containing protein 25 | -3.350434 | 0.0005058 |
| K01228 MOGS; mannosyl-oligosaccharide glucosidase | -3.364307 | 0.0009338 |
| K15286 SLC35E4; solute carrier family 35, member E4 | -3.391393 | 0.0000965 |
| K17092 ANXA2; annexin A2 | -3.396008 | 0.0008336 |
| K08859 STK35, PDIK1, CLIK1; serine/threonine kinase 35 | -3.402229 | 0.0003786 |
| K11310 GTF3C4, KAT12; general transcription factor 3C polypeptide 4 | -3.424249 | 0.0013772 |
| K10269 FBXL3; F-box and leucine-rich repeat protein 3 | -3.439181 | 0.0020834 |
| K06262 GP1BB, CD42c; platelet glycoprotein Ib beta chain | -3.447902 | 0.0003672 |
| K10458 KLHL21; kelch-like protein 21 | -3.501530 | 0.0003003 |
| K00889 PIP5K; 1-phosphatidylinositol-4-phosphate 5-kinase | -3.504482 | 0.0020400 |
| K06278 CAV1; caveolin 1 | -3.526911 | 0.0002417 |
| K07861 RAC3; Ras-related C3 botulinum toxin substrate 3 | -3.529358 | 0.0020969 |
| K13213 MATR3; matrin 3 | -3.539074 | 0.0056097 |
| K20212 PROX2; prospero homeobox 2 | -3.542098 | 0.0002030 |
| K13113 UBL5, HUB1; ubiquitin-like protein 5 | -3.551432 | 0.0016761 |
| K02949 RP-S11e, RPS11; small subunit ribosomal protein S11e | -3.580150 | 0.0017033 |
| K22236 EXPH5; exophilin-5 | -3.592531 | 0.0001144 |
| K07941 ARF6; ADP-ribosylation factor 6 | -3.594768 | 0.0009074 |
| K15688 MUL1; E3 ubiquitin-protein ligase MUL1 | -3.596998 | 0.0015710 |
| K04662 BMP4; bone morphogenetic protein 4 | -3.615678 | 0.0005611 |
| K10536 aguA; agmatine deiminase | -3.618663 | 0.0005001 |
| K16198 YWHAG_H; 37694 protein gamma/eta | -3.621081 | 0.0012849 |
| K04300 SREB3, GPR173; super conserved receptor expressed in brain 3 | -3.622873 | 0.0033145 |
| K02528 ksgA; 16S rRNA (adenine1518-N6/adenine1519-N6)-dimethyltransferase | -10.634240 | 0.0000001 |
| K02959 RP-S16, MRPS16, rpsP; small subunit ribosomal protein S16 | -10.635716 | 0.0000000 |
| K02114 ATPF1E, atpC; F-type H+-transporting ATPase subunit epsilon | -10.637286 | 0.0000000 |
| K02902 RP-L28, MRPL28, rpmB; large subunit ribosomal protein L28 | -10.649774 | 0.0000000 |
| K03040 rpoA; DNA-directed RNA polymerase subunit alpha | -10.658494 | 0.0000000 |
| K03977 engA, der; GTPase | -10.668886 | 0.0000000 |
| K01887 RARS, argS; arginyl-tRNA synthetase | -10.669018 | 0.0000000 |
| K02337 dnaE; DNA polymerase III subunit alpha | -10.669362 | 0.0000000 |
| K01495 GCH1, folE; GTP cyclohydrolase IA | -10.694049 | 0.0000003 |
| K02967 RP-S2, MRPS2, rpsB; small subunit ribosomal protein S2 | -10.712133 | 0.0000000 |
| K03798 ftsH, hflB; cell division protease FtsH | -10.736973 | 0.0000000 |
| K02650 pilA; type IV pilus assembly protein PilA | -10.744343 | 0.0000002 |
| K07284 srtA; sortase A | -10.747371 | 0.0000003 |
| K12952 ctpE; cation-transporting P-type ATPase E | -10.754468 | 0.0000000 |
| K03687 GRPE; molecular chaperone GrpE | -10.762873 | 0.0000000 |
| K03470 rnhB; ribonuclease HII | -10.766713 | 0.0000000 |
| K21572 susD; starch-binding outer membrane protein, SusD/RagB family | -10.770900 | 0.0000068 |
| K02899 RP-L27, MRPL27, rpmA; large subunit ribosomal protein L27 | -10.789977 | 0.0000000 |
| K02016 ABC.FEV.S; iron complex transport system substrate-binding protein | -10.802965 | 0.0000001 |
| K02945 RP-S1, rpsA; small subunit ribosomal protein S1 | -10.813169 | 0.0000000 |
| K02015 ABC.FEV.P; iron complex transport system permease protein | -10.831730 | 0.0000001 |
| K01876 aspS; aspartyl-tRNA synthetase | -10.861371 | 0.0000000 |
| K03980 murJ, mviN; putative peptidoglycan lipid II flippase | -10.864994 | 0.0000000 |
| K01662 dxs; 1-deoxy-D-xylulose-5-phosphate synthase | -10.870003 | 0.0000000 |
| K00604 MTFMT, fmt; methionyl-tRNA formyltransferase | -10.873510 | 0.0000000 |
| K03601 xseA; exodeoxyribonuclease VII large subunit | -10.874460 | 0.0000000 |
| K02950 RP-S12, MRPS12, rpsL; small subunit ribosomal protein S12 | -10.890262 | 0.0000000 |
| K03657 uvrD, pcrA; DNA helicase II / ATP-dependent DNA helicase PcrA | -10.911499 | 0.0000000 |
| K07652 vicK; two-component system, OmpR family, sensor histidine kinase VicK | -10.929455 | 0.0000001 |
| K07056 rsmI; 16S rRNA (cytidine1402-2’-O)-methyltransferase | -11.009937 | 0.0000000 |
| K02108 ATPF0A, atpB; F-type H+-transporting ATPase subunit a | -11.016583 | 0.0000000 |
| K02004 ABC.CD.P; putative ABC transport system permease protein | -11.036046 | 0.0000000 |
| K01929 murF; UDP-N-acetylmuramoyl-tripeptide–D-alanyl-D-alanine ligase | -11.069960 | 0.0000002 |
| K06147 ABCB-BAC; ATP-binding cassette, subfamily B, bacterial | -11.093019 | 0.0000002 |
| K02909 RP-L31, rpmE; large subunit ribosomal protein L31 | -11.093321 | 0.0000000 |
| K02879 RP-L17, MRPL17, rplQ; large subunit ribosomal protein L17 | -11.096519 | 0.0077198 |
| K02948 RP-S11, MRPS11, rpsK; small subunit ribosomal protein S11 | -11.136386 | 0.0000000 |
| K03711 fur, zur, furB; Fur family transcriptional regulator, ferric uptake regulator | -11.137503 | 0.0000000 |
| K02884 RP-L19, MRPL19, rplS; large subunit ribosomal protein L19 | -11.145289 | 0.0000000 |
| K02520 infC, MTIF3; translation initiation factor IF-3 | -11.157493 | 0.0000000 |
| K02913 RP-L33, MRPL33, rpmG; large subunit ribosomal protein L33 | -11.209100 | 0.0000000 |
| K03629 recF; DNA replication and repair protein RecF | -11.282828 | 0.0000000 |
| K03284 corA; magnesium transporter | -11.300507 | 0.0000002 |
| K01928 murE; UDP-N-acetylmuramoyl-L-alanyl-D-glutamate–2,6-diaminopimelate ligase | -11.356828 | 0.0000000 |
| K02518 infA; translation initiation factor IF-1 | -11.421470 | 0.0000000 |
| K02963 RP-S18, MRPS18, rpsR; small subunit ribosomal protein S18 | -11.429634 | 0.0000001 |
| K02035 ABC.PE.S; peptide/nickel transport system substrate-binding protein | -11.584982 | 0.0000000 |
| K02428 rdgB; XTP/dITP diphosphohydrolase | -11.977146 | 0.0000000 |
| K01992 ABC-2.P; ABC-2 type transport system permease protein | -12.183237 | 0.0000000 |
| UNGROUPED | -101.487653 | 0.0001436 |
Negative fold changes are higher in buccal samples while positive are higher in blood samples.
bb3 = kos.loc[:, ['Buccal_Blood_FC', 'Buccal_Blood_p']]
bb3 = bb3[bb3['Buccal_Blood_p'] < 0.01]
bb3 = bb3.sort_values(by=['Buccal_Blood_FC'], ascending=False).rename(columns={'Buccal_Blood_FC':'Log2 fold change', 'Buccal_Blood_p':'p'})
bb3 = pd.concat([bb3.head(n=50), bb3.tail(n=50)])
bb3 = bb3.groupby(by=bb3.index, axis=0).mean()
bb3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bb3 %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| K05692 ACTB_G1; actin beta/gamma 1 | 1.4993031 | 0.0003430 |
| K02262 COX3; cytochrome c oxidase subunit 3 | -0.6901933 | 0.0065397 |
| K03879 ND2; NADH-ubiquinone oxidoreductase chain 2 | -0.7995222 | 0.0071439 |
| K03880 ND3; NADH-ubiquinone oxidoreductase chain 3 | -0.9286128 | 0.0067070 |
| K02256 COX1; cytochrome c oxidase subunit 1 | -1.0054815 | 0.0015965 |
| K03881 ND4; NADH-ubiquinone oxidoreductase chain 4 | -1.3406126 | 0.0000704 |
| K03883 ND5; NADH-ubiquinone oxidoreductase chain 5 | -1.7012226 | 0.0000637 |
| K03884 ND6; NADH-ubiquinone oxidoreductase chain 6 | -1.7732693 | 0.0000640 |
| K02261 COX2; cytochrome c oxidase subunit 2 | -1.8317668 | 0.0000277 |
| K03882 ND4L; NADH-ubiquinone oxidoreductase chain 4L | -1.9266330 | 0.0002886 |
| K01197 hya; hyaluronoglucosaminidase | -5.5547745 | 0.0000297 |
| K04186 ACKR4, CCRL1, CCR11; atypical chemokine receptor 4 | -5.5603638 | 0.0000562 |
| K08051 PLEKHA8, FAPP2; pleckstrin homology domain containing family A member 8 | -5.7567048 | 0.0000020 |
| K02975 RP-S25e, RPS25; small subunit ribosomal protein S25e | -6.0071094 | 0.0000464 |
| K00716 FUT3; galactoside 3(4)-fucosyltransferase 3 | -6.0412983 | 0.0000008 |
| K03605 hyaD, hybD; hydrogenase maturation protease | -6.0741342 | 0.0001277 |
| K22063 ISCA1; iron-sulfur cluster assembly 1 | -6.5689145 | 0.0000128 |
| K04181 CCR6, CD196; C-C chemokine receptor type 6 | -6.5690809 | 0.0000001 |
| K00161 PDHA, pdhA; pyruvate dehydrogenase E1 component alpha subunit | -6.5963219 | 0.0000039 |
| K00974 cca; tRNA nucleotidyltransferase (CCA-adding enzyme) | -6.6135533 | 0.0000080 |
| K12267 msrAB; peptide methionine sulfoxide reductase msrA/msrB | -6.6908854 | 0.0000086 |
| K05592 deaD, cshA; ATP-dependent RNA helicase DeaD | -6.7206069 | 0.0001525 |
| K03402 argR, ahrC; transcriptional regulator of arginine metabolism | -6.7282698 | 0.0000033 |
| K19468 KCNJ18; potassium inwardly-rectifying channel subfamily J member 18 | -6.7950723 | 0.0000096 |
| K05787 hupA; DNA-binding protein HU-alpha | -6.8009142 | 0.0000039 |
| K09067 ASCL; achaete-scute complex protein | -6.8498691 | 0.0000022 |
| K02271 COX7B; cytochrome c oxidase subunit 7b | -6.8592266 | 0.0000063 |
| K02873 RP-L13e, RPL13; large subunit ribosomal protein L13e | -6.9679175 | 0.0000089 |
| K03571 mreD; rod shape-determining protein MreD | -6.9707589 | 0.0000810 |
| K03062 PSMC1, RPT2; 26S proteasome regulatory subunit T2 | -6.9765306 | 0.0000004 |
| K16362 FLRT; leucine-rich repeat transmembrane protein FLRT | -6.9876251 | 0.0000000 |
| K15705 RNF152; E3 ubiquitin-protein ligase RNF152 | -6.9964333 | 0.0000004 |
| K04206 NPY4R; neuropeptide Y receptor type 4 | -7.0025676 | 0.0000001 |
| K09014 sufB; Fe-S cluster assembly protein SufB | -7.0457215 | 0.0000014 |
| K03531 ftsZ; cell division protein FtsZ | -7.0472135 | 0.0000029 |
| K00549 metE; 5-methyltetrahydropteroyltriglutamate–homocysteine methyltransferase | -7.0575570 | 0.0001285 |
| K02012 afuA, fbpA; iron(III) transport system substrate-binding protein | -7.0665801 | 0.0000956 |
| K04047 dps; starvation-inducible DNA-binding protein | -7.0994269 | 0.0000053 |
| K00865 glxK, garK; glycerate 2-kinase | -7.0996546 | 0.0000000 |
| K15583 oppD; oligopeptide transport system ATP-binding protein | -7.1030263 | 0.0000901 |
| K00759 APRT, apt; adenine phosphoribosyltransferase | -7.1375675 | 0.0000001 |
| K15051 endA; DNA-entry nuclease | -7.1395383 | 0.0000011 |
| K02882 RP-L18Ae, RPL18A; large subunit ribosomal protein L18Ae | -7.1535295 | 0.0000398 |
| K09817 znuC; zinc transport system ATP-binding protein | -7.1585516 | 0.0000625 |
| K01744 aspA; aspartate ammonia-lyase | -7.1621156 | 0.0000000 |
| K00362 nirB; nitrite reductase (NADH) large subunit | -7.1815300 | 0.0000013 |
| K09501 SERPINH1, HSP47; serpin peptidase inhibitor, clade H, member 1 | -7.2541492 | 0.0000014 |
| K01870 IARS, ileS; isoleucyl-tRNA synthetase | -7.3234073 | 0.0000020 |
| K01462 PDF, def; peptide deformylase | -7.3667162 | 0.0000013 |
| K03499 trkA, ktrA; trk system potassium uptake protein | -7.4149298 | 0.0000113 |
| K03687 GRPE; molecular chaperone GrpE | -8.5932077 | 0.0000636 |
| K00728 POMT, pmt; dolichyl-phosphate-mannose-protein mannosyltransferase | -8.6341350 | 0.0000113 |
| K12019 TRIM43S; tripartite motif-containing protein 43/48/49/64/77 | -8.6406359 | 0.0000002 |
| K01897 ACSL, fadD; long-chain acyl-CoA synthetase | -8.6451236 | 0.0000000 |
| K01673 cynT, can; carbonic anhydrase | -8.6489442 | 0.0000010 |
| K06133 LYS5, acpT; 4’-phosphopantetheinyl transferase | -8.6506859 | 0.0000086 |
| K01256 pepN; aminopeptidase N | -8.6534216 | 0.0000075 |
| K03801 lipB; lipoyl(octanoyl) transferase | -8.6579620 | 0.0000085 |
| K08680 menH; 2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate synthase | -8.7457409 | 0.0000029 |
| K03601 xseA; exodeoxyribonuclease VII large subunit | -8.7460410 | 0.0000178 |
| K02909 RP-L31, rpmE; large subunit ribosomal protein L31 | -8.7782578 | 0.0000050 |
| K03629 recF; DNA replication and repair protein RecF | -8.7908081 | 0.0000020 |
| K02358 tuf, TUFM; elongation factor Tu | -8.7911934 | 0.0001599 |
| K00919 ispE; 4-diphosphocytidyl-2-C-methyl-D-erythritol kinase | -8.8275283 | 0.0000001 |
| K03308 TC.NSS; neurotransmitter:Na+ symporter, NSS family | -8.8362899 | 0.0000020 |
| K02777 PTS-Glc-EIIA, crr; PTS system, sugar-specific IIA component | -8.8622937 | 0.0000004 |
| K00240 sdhB, frdB; succinate dehydrogenase / fumarate reductase, iron-sulfur subunit | -8.8736866 | 0.0000005 |
| K01752 E4.3.1.17, sdaA, sdaB, tdcG; L-serine dehydratase | -8.8849557 | 0.0000001 |
| K07442 TRM61, GCD14; tRNA (adenine57-N1/adenine58-N1)-methyltransferase catalytic subunit | -8.9166101 | 0.0000001 |
| K12952 ctpE; cation-transporting P-type ATPase E | -8.9502777 | 0.0000001 |
| K01887 RARS, argS; arginyl-tRNA synthetase | -8.9638112 | 0.0000031 |
| K03561 exbB; biopolymer transport protein ExbB | -8.9871294 | 0.0000039 |
| K02913 RP-L33, MRPL33, rpmG; large subunit ribosomal protein L33 | -9.0621394 | 0.0000014 |
| K00948 PRPS, prsA; ribose-phosphate pyrophosphokinase | -9.1014159 | 0.0000000 |
| K02016 ABC.FEV.S; iron complex transport system substrate-binding protein | -9.1059215 | 0.0000007 |
| K04096 smf; DNA processing protein | -9.1325659 | 0.0000008 |
| K01495 GCH1, folE; GTP cyclohydrolase IA | -9.1419679 | 0.0000000 |
| K01790 rfbC, rmlC; dTDP-4-dehydrorhamnose 3,5-epimerase | -9.1423791 | 0.0000003 |
| K00067 rfbD, rmlD; dTDP-4-dehydrorhamnose reductase | -9.1423791 | 0.0000003 |
| K01923 purC; phosphoribosylaminoimidazole-succinocarboxamide synthase | -9.1888857 | 0.0000031 |
| K08998 K08998; uncharacterized protein | -9.1971222 | 0.0000040 |
| K00036 G6PD, zwf; glucose-6-phosphate 1-dehydrogenase | -9.2062001 | 0.0000012 |
| K02470 gyrB; DNA gyrase subunit B | -9.2629947 | 0.0000002 |
| K02335 polA; DNA polymerase I | -9.2744374 | 0.0000085 |
| K03310 TC.AGCS; alanine or glycine:cation symporter, AGCS family | -9.2785495 | 0.0000040 |
| K03536 rnpA; ribonuclease P protein component | -9.2975345 | 0.0000043 |
| K01662 dxs; 1-deoxy-D-xylulose-5-phosphate synthase | -9.3335747 | 0.0000043 |
| K02970 RP-S21, MRPS21, rpsU; small subunit ribosomal protein S21 | -9.3704015 | 0.0000001 |
| K03559 exbD; biopolymer transport protein ExbD | -9.4022669 | 0.0000000 |
| K00059 fabG, OAR1; 3-oxoacyl- | -9.4756747 | 0.0000004 |
| K01754 E4.3.1.19, ilvA, tdcB; threonine dehydratase | -9.5346320 | 0.0000000 |
| K00088 IMPDH, guaB; IMP dehydrogenase | -9.6008352 | 0.0000000 |
| K14347 SLC10A7, P7; solute carrier family 10 (sodium/bile acid cotransporter), member 7 | -9.6046050 | 0.0000106 |
| K03657 uvrD, pcrA; DNA helicase II / ATP-dependent DNA helicase PcrA | -9.6845660 | 0.0000000 |
| K02518 infA; translation initiation factor IF-1 | -9.7149416 | 0.0000003 |
| K05985 rnmV; ribonuclease M5 | -9.7184461 | 0.0000195 |
| K02015 ABC.FEV.P; iron complex transport system permease protein | -9.9488969 | 0.0000096 |
| K01992 ABC-2.P; ABC-2 type transport system permease protein | -10.2947787 | 0.0000014 |
| K06199 crcB, FEX; fluoride exporter | -10.3477796 | 0.0001305 |
| UNGROUPED | -279.5860424 | 0.0009433 |
Negative fold changes are higher in saliva samples while positive are higher in buccal samples.
sb3 = kos.loc[:, ['Saliva_Buccal_FC', 'Saliva_Buccal_p']]
sb3 = sb3[sb3['Saliva_Buccal_p'] < 0.01]
sb3 = sb3.sort_values(by=['Saliva_Buccal_FC'], ascending=False).rename(columns={'Saliva_Buccal_FC':'Log2 fold change', 'Saliva_Buccal_p':'p'})
sb3 = pd.concat([sb3.head(n=50), sb3.tail(n=50)])
sb3 = sb3.groupby(by=sb3.index, axis=0).mean()
sb3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$sb3 %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| K22463 afsA; 2-oxo-3-(phosphooxy)propyl 3-oxoalkanoate synthase | 2.850946 | 0.0058151 |
| K00865 glxK, garK; glycerate 2-kinase | 2.820485 | 0.0067396 |
| K10752 RBBP4, HAT2, CAF1, MIS16; histone-binding protein RBBP4 | 2.399969 | 0.0051015 |
| K02261 COX2; cytochrome c oxidase subunit 2 | 2.202837 | 0.0000476 |
| K10354 ACTA1; actin, alpha skeletal muscle | 2.193567 | 0.0065157 |
| K03879 ND2; NADH-ubiquinone oxidoreductase chain 2 | 2.133737 | 0.0000685 |
| K03881 ND4; NADH-ubiquinone oxidoreductase chain 4 | 2.127053 | 0.0000110 |
| K02125 ATPeF08, MTATP8, ATP8; F-type H+-transporting ATPase subunit 8 | 2.106681 | 0.0000919 |
| K03884 ND6; NADH-ubiquinone oxidoreductase chain 6 | 2.071658 | 0.0000274 |
| K03883 ND5; NADH-ubiquinone oxidoreductase chain 5 | 2.066092 | 0.0000330 |
| K03878 ND1; NADH-ubiquinone oxidoreductase chain 1 | 2.047173 | 0.0000078 |
| K03880 ND3; NADH-ubiquinone oxidoreductase chain 3 | 2.025634 | 0.0000472 |
| K03882 ND4L; NADH-ubiquinone oxidoreductase chain 4L | 2.005996 | 0.0001796 |
| K01673 cynT, can; carbonic anhydrase | 1.985742 | 0.0096769 |
| K02256 COX1; cytochrome c oxidase subunit 1 | 1.953674 | 0.0003900 |
| K02951 RP-S12e, RPS12; small subunit ribosomal protein S12e | 1.879717 | 0.0034727 |
| K02262 COX3; cytochrome c oxidase subunit 3 | 1.836880 | 0.0002819 |
| K02865 RP-L10Ae, RPL10A; large subunit ribosomal protein L10Ae | 1.824018 | 0.0057603 |
| K02880 RP-L17e, RPL17; large subunit ribosomal protein L17e | 1.810086 | 0.0046569 |
| K02987 RP-S4e, RPS4; small subunit ribosomal protein S4e | 1.720987 | 0.0023484 |
| K00716 FUT3; galactoside 3(4)-fucosyltransferase 3 | 1.478105 | 0.0092689 |
| K07604 KRT1; type I keratin, acidic | 1.279948 | 0.0058158 |
| K02917 RP-L35Ae, RPL35A; large subunit ribosomal protein L35Ae | 1.029740 | 0.0089774 |
| K02887 RP-L20, MRPL20, rplT; large subunit ribosomal protein L20 | -1.138422 | 0.0067601 |
| K03657 uvrD, pcrA; DNA helicase II / ATP-dependent DNA helicase PcrA | -1.226933 | 0.0011742 |
| K04077 groEL, HSPD1; chaperonin GroEL | -1.315605 | 0.0012724 |
| K02835 prfA, MTRF1, MRF1; peptide chain release factor 1 | -1.550880 | 0.0007800 |
| K03060 rpoZ; DNA-directed RNA polymerase subunit omega | -1.588916 | 0.0043101 |
| K01887 RARS, argS; arginyl-tRNA synthetase | -1.705206 | 0.0063117 |
| K02518 infA; translation initiation factor IF-1 | -1.706528 | 0.0030988 |
| K12952 ctpE; cation-transporting P-type ATPase E | -1.804191 | 0.0067174 |
| K03551 ruvB; holliday junction DNA helicase RuvB | -1.814960 | 0.0059714 |
| K03595 era, ERAL1; GTPase | -1.832702 | 0.0073609 |
| K01992 ABC-2.P; ABC-2 type transport system permease protein | -1.888459 | 0.0072512 |
| K01897 ACSL, fadD; long-chain acyl-CoA synthetase | -1.893089 | 0.0084500 |
| K02238 comEC; competence protein ComEC | -2.087058 | 0.0011352 |
| K02913 RP-L33, MRPL33, rpmG; large subunit ribosomal protein L33 | -2.146961 | 0.0077292 |
| K00943 tmk, DTYMK; dTMP kinase | -2.194727 | 0.0016675 |
| K02959 RP-S16, MRPS16, rpsP; small subunit ribosomal protein S16 | -2.286885 | 0.0054462 |
| K00872 thrB1; homoserine kinase | -2.304481 | 0.0004883 |
| K02909 RP-L31, rpmE; large subunit ribosomal protein L31 | -2.315063 | 0.0041248 |
| K02003 ABC.CD.A; putative ABC transport system ATP-binding protein | -2.437543 | 0.0027193 |
| K03629 recF; DNA replication and repair protein RecF | -2.492020 | 0.0040615 |
| K09206 KLF5; krueppel-like factor 5 | -2.551036 | 0.0013898 |
| K02892 RP-L23, MRPL23, rplW; large subunit ribosomal protein L23 | -2.595122 | 0.0032808 |
| K03097 CSNK2A; casein kinase II subunit alpha | -2.640297 | 0.0002630 |
| K02528 ksgA; 16S rRNA (adenine1518-N6/adenine1519-N6)-dimethyltransferase | -2.657221 | 0.0007924 |
| K01870 IARS, ileS; isoleucyl-tRNA synthetase | -2.676801 | 0.0018858 |
| K01834 PGAM, gpmA; 2,3-bisphosphoglycerate-dependent phosphoglycerate mutase | -2.779467 | 0.0013469 |
| K02879 RP-L17, MRPL17, rplQ; large subunit ribosomal protein L17 | -2.856197 | 0.0092922 |
| K00226 pyrD; dihydroorotate dehydrogenase (fumarate) | -9.729337 | 0.0000005 |
| K02340 holA; DNA polymerase III subunit delta | -9.769813 | 0.0074585 |
| K19302 bcrC; undecaprenyl-diphosphatase | -9.788827 | 0.0042853 |
| K01714 dapA; 4-hydroxy-tetrahydrodipicolinate synthase | -9.824873 | 0.0068356 |
| K04068 nrdG; anaerobic ribonucleoside-triphosphate reductase activating protein | -9.826740 | 0.0000007 |
| K01002 mdoB; phosphoglycerol transferase | -9.836073 | 0.0000006 |
| K05873 cyaB; adenylate cyclase, class 2 | -9.874195 | 0.0000002 |
| K02890 RP-L22, MRPL22, rplV; large subunit ribosomal protein L22 | -9.895246 | 0.0025241 |
| K19710 E2.7.7.53; ATP adenylyltransferase | -9.896449 | 0.0000006 |
| K01579 panD; aspartate 1-decarboxylase | -9.915735 | 0.0000001 |
| K06885 K06885; uncharacterized protein | -9.923682 | 0.0037099 |
| K06180 rluD; 23S rRNA pseudouridine1911/1915/1917 synthase | -9.960684 | 0.0022266 |
| K01356 lexA; repressor LexA | -10.000666 | 0.0000007 |
| K02662 pilM; type IV pilus assembly protein PilM | -10.002825 | 0.0000004 |
| K09775 K09775; uncharacterized protein | -10.039478 | 0.0000006 |
| K07042 ybeY, yqfG; probable rRNA maturation factor | -10.045636 | 0.0020905 |
| K07009 gatD; lipid II isoglutaminyl synthase (glutamine-hydrolysing) | -10.067533 | 0.0000005 |
| K07082 K07082; UPF0755 protein | -10.074038 | 0.0043652 |
| K03046 rpoC; DNA-directed RNA polymerase subunit beta’ | -10.082929 | 0.0035729 |
| K07462 recJ; single-stranded-DNA-specific exonuclease | -10.084816 | 0.0000007 |
| K10563 mutM, fpg; formamidopyrimidine-DNA glycosylase | -10.116843 | 0.0000001 |
| K13993 HSP20; HSP20 family protein | -10.125501 | 0.0000007 |
| K02653 pilC; type IV pilus assembly protein PilC | -10.140647 | 0.0000005 |
| K00615 E2.2.1.1, tktA, tktB; transketolase | -10.185928 | 0.0025127 |
| K07447 ruvX; putative holliday junction resolvase | -10.238002 | 0.0000000 |
| K07005 K07005; uncharacterized protein | -10.252818 | 0.0000006 |
| K09117 K09117; uncharacterized protein | -10.253138 | 0.0000007 |
| K16302 CNNM; metal transporter CNNM | -10.277650 | 0.0000004 |
| K02992 RP-S7, MRPS7, rpsG; small subunit ribosomal protein S7 | -10.306224 | 0.0038282 |
| K01358 clpP, CLPP; ATP-dependent Clp protease, protease subunit | -10.329657 | 0.0036579 |
| K07027 K07027; glycosyltransferase 2 family protein | -10.331001 | 0.0000003 |
| K09799 K09799; uncharacterized protein | -10.356046 | 0.0000005 |
| K03685 rnc, DROSHA, RNT1; ribonuclease III | -10.375157 | 0.0015344 |
| K03703 uvrC; excinuclease ABC subunit C | -10.396153 | 0.0068530 |
| K01873 VARS, valS; valyl-tRNA synthetase | -10.424692 | 0.0014117 |
| K03070 secA; preprotein translocase subunit SecA | -10.468957 | 0.0037976 |
| K04075 tilS, mesJ; tRNA(Ile)-lysidine synthase | -10.473047 | 0.0000000 |
| K03625 nusB; transcription antitermination protein NusB | -10.502848 | 0.0030047 |
| K00942 E2.7.4.8, gmk; guanylate kinase | -10.529623 | 0.0024473 |
| K02114 ATPF1E, atpC; F-type H+-transporting ATPase subunit epsilon | -10.637286 | 0.0000000 |
| K02902 RP-L28, MRPL28, rpmB; large subunit ribosomal protein L28 | -10.649774 | 0.0077066 |
| K02967 RP-S2, MRPS2, rpsB; small subunit ribosomal protein S2 | -10.712133 | 0.0009110 |
| K02650 pilA; type IV pilus assembly protein PilA | -10.744343 | 0.0000002 |
| K07284 srtA; sortase A | -10.747371 | 0.0000003 |
| K03470 rnhB; ribonuclease HII | -10.766713 | 0.0004899 |
| K21572 susD; starch-binding outer membrane protein, SusD/RagB family | -10.770900 | 0.0000068 |
| K07652 vicK; two-component system, OmpR family, sensor histidine kinase VicK | -10.929455 | 0.0008861 |
| K06147 ABCB-BAC; ATP-binding cassette, subfamily B, bacterial | -11.093019 | 0.0000002 |
| K03284 corA; magnesium transporter | -11.300507 | 0.0000002 |
| UNGROUPED | -159.319416 | 0.0033425 |
Negative fold changes are higher in saliva samples while positive are higher in saliva_euk samples.
ss3 = kos.loc[:, ['Saliva_Saliva_euk_FC', 'Saliva_Saliva_euk_p']]
ss3 = ss3[ss3['Saliva_Saliva_euk_p'] < 0.01]
ss3 = ss3.sort_values(by=['Saliva_Saliva_euk_FC'], ascending=False).rename(columns={'Saliva_Saliva_euk_FC':'Log2 fold change', 'Saliva_Saliva_euk_p':'p'})
ss3 = pd.concat([ss3.head(n=50), ss3.tail(n=50)])
ss3 = ss3.groupby(by=ss3.index, axis=0).mean()
ss3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$ss3 %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| K11254 H4; histone H4 | 2.0312789 | 0.0037851 |
| K07374 TUBA; tubulin alpha | 1.6788133 | 0.0060293 |
| K09560 ST13; suppressor of tumorigenicity protein 13 | 1.6686333 | 0.0085426 |
| K07604 KRT1; type I keratin, acidic | 1.6012059 | 0.0026179 |
| K11253 H3; histone H3 | 1.5814691 | 0.0034163 |
| K02870 RP-L12e, RPL12; large subunit ribosomal protein L12e | 1.1022718 | 0.0042198 |
| K03664 smpB; SsrA-binding protein | -0.9274021 | 0.0091598 |
| K02909 RP-L31, rpmE; large subunit ribosomal protein L31 | -1.0914627 | 0.0000497 |
| K02891 RP-L22e, RPL22; large subunit ribosomal protein L22e | -1.0921348 | 0.0075675 |
| K03884 ND6; NADH-ubiquinone oxidoreductase chain 6 | -1.4474400 | 0.0043794 |
| K03111 ssb; single-strand DNA-binding protein | -1.5397928 | 0.0075431 |
| K09206 KLF5; krueppel-like factor 5 | -2.5510365 | 0.0013898 |
| K08428 GPR139; G protein-coupled receptor 139 | -3.1270607 | 0.0008065 |
| K08859 STK35, PDIK1, CLIK1; serine/threonine kinase 35 | -3.4022291 | 0.0003786 |
| K10269 FBXL3; F-box and leucine-rich repeat protein 3 | -3.4391808 | 0.0020834 |
| K10536 aguA; agmatine deiminase | -3.6186631 | 0.0005001 |
| K04300 SREB3, GPR173; super conserved receptor expressed in brain 3 | -3.6228735 | 0.0033145 |
| K05223 NMB; neuromedin B | -3.7296179 | 0.0057104 |
| K01669 phrB; deoxyribodipyrimidine photo-lyase | -3.7660531 | 0.0015032 |
| K02258 COX11, ctaG; cytochrome c oxidase assembly protein subunit 11 | -3.8012608 | 0.0022219 |
| K02222 SFRP5; secreted frizzled-related protein 5 | -3.8855100 | 0.0011182 |
| K19910 SYT10; synaptotagmin-10 | -3.9182049 | 0.0007119 |
| K04939 KCNMB3; potassium large conductance calcium-activated channel subfamily M beta member 3 | -3.9417163 | 0.0001364 |
| K04969 TRPC6; transient receptor potential cation channel subfamily C member 6 | -3.9536756 | 0.0003509 |
| K17822 DCUN1D1_2; DCN1-like protein 43862 | -4.0010991 | 0.0000262 |
| K19876 STEAP4; metalloreductase STEAP4 | -4.0023695 | 0.0000728 |
| K13155 SNRNP35; U11/U12 small nuclear ribonucleoprotein 35 kDa protein | -4.1129610 | 0.0003233 |
| K22073 IBA57; transferase CAF17, mitochondrial | -4.1232314 | 0.0000738 |
| K16146 pep2; maltokinase | -4.1639256 | 0.0081216 |
| K10861 UDG2, CCNO; uracil-DNA glycosylase 2 | -4.1809700 | 0.0003352 |
| K16586 HAUS3; HAUS augmin-like complex subunit 3 | -4.1937376 | 0.0000252 |
| K16069 ADAM29; disintegrin and metalloproteinase domain-containing protein 29 | -4.2406876 | 0.0000199 |
| K10040 peb1B, glnP, glnM; aspartate/glutamate/glutamine transport system permease protein | -4.2514462 | 0.0059826 |
| K00865 glxK, garK; glycerate 2-kinase | -4.2791696 | 0.0014200 |
| K10476 KBTBD11; kelch repeat and BTB domain-containing protein 11 | -4.2847677 | 0.0006106 |
| K02036 pstB; phosphate transport system ATP-binding protein | -4.3169584 | 0.0007591 |
| K05307 THTPA; thiamine-triphosphatase | -4.3342036 | 0.0003188 |
| K09039 NFE2; nuclear factor erythroid 2 | -4.3363638 | 0.0000797 |
| K04667 INHBA; inhibin beta A chain | -4.4242494 | 0.0000395 |
| K15102 SLC25A3, PHC, PIC; solute carrier family 25 (mitochondrial phosphate transporter), member 3 | -4.4830752 | 0.0017722 |
| K10712 ADO; cysteamine dioxygenase | -4.5607344 | 0.0000705 |
| K02160 accB, bccP; acetyl-CoA carboxylase biotin carboxyl carrier protein | -4.5697394 | 0.0003862 |
| K03892 arsR; ArsR family transcriptional regulator, arsenate/arsenite/antimonite-responsive transcriptional repressor | -4.5804169 | 0.0001169 |
| K01227 E3.2.1.96; mannosyl-glycoprotein endo-beta-N-acetylglucosaminidase | -4.6288951 | 0.0027745 |
| K09997 artI; arginine transport system substrate-binding protein | -4.6926176 | 0.0030980 |
| K08206 SLC22A9S; MFS transporter, OCT family, solute carrier family 22 (organic anion/cation transporter), member 9/10/19/24/25 | -4.7049019 | 0.0000053 |
| K04517 tyrA2; prephenate dehydrogenase | -4.7425379 | 0.0004995 |
| K17344 ROM1, TSPAN23; rod outer segment membrane protein 1 | -4.7861964 | 0.0002944 |
| K00419 QCR9, UCRC; ubiquinol-cytochrome c reductase subunit 9 | -4.7999481 | 0.0075587 |
| K02153 ATPeV0E, ATP6H; V-type H+-transporting ATPase subunit e | -4.8363638 | 0.0006969 |
| K02199 ccmG, dsbE; cytochrome c biogenesis protein CcmG, thiol:disulfide interchange protein DsbE | -5.1522058 | 0.0000403 |
| K09808 lolC_E; lipoprotein-releasing system permease protein | -5.1884507 | 0.0001386 |
| K22463 afsA; 2-oxo-3-(phosphooxy)propyl 3-oxoalkanoate synthase | -5.1949011 | 0.0000211 |
| K03592 pmbA; PmbA protein | -5.2291259 | 0.0000043 |
| K02445 glpT; MFS transporter, OPA family, glycerol-3-phosphate transporter | -5.2370393 | 0.0046875 |
| K00627 DLAT, aceF, pdhC; pyruvate dehydrogenase E2 component (dihydrolipoamide acetyltransferase) | -5.2442432 | 0.0000001 |
| K14443 TOB; protein Tob/BTG | -5.2481079 | 0.0070673 |
| K16859 PGF; placenta growth factor | -5.2517623 | 0.0000307 |
| K00417 QCR7, UQCRB; ubiquinol-cytochrome c reductase subunit 7 | -5.3392869 | 0.0011680 |
| K19804 lapB; lipopolysaccharide assembly protein B | -5.4020946 | 0.0000521 |
| K01193 INV, sacA; beta-fructofuranosidase | -5.4035020 | 0.0000000 |
| K02549 menC; O-succinylbenzoate synthase | -5.4195231 | 0.0000451 |
| K07407 E3.2.1.22B, galA, rafA; alpha-galactosidase | -5.4935953 | 0.0000159 |
| K13671 K13671; alpha-1,2-mannosyltransferase | -5.5403114 | 0.0000658 |
| K21828 arcR; CRP/FNR family transcriptional regulator, arginine deiminase pathway regulator | -5.7368659 | 0.0017124 |
| K07386 pepO; putative endopeptidase | -5.7453284 | 0.0002614 |
| K03784 deoD; purine-nucleoside phosphorylase | -5.7791686 | 0.0000081 |
| K13628 iscA; iron-sulfur cluster assembly protein | -5.7889425 | 0.0095671 |
| K10541 mglC; methyl-galactoside transport system permease protein | -5.9340829 | 0.0000001 |
| K03695 clpB; ATP-dependent Clp protease ATP-binding subunit ClpB | -5.9491229 | 0.0000008 |
| K02501 hisH; imidazole glycerol-phosphate synthase subunit HisH | -6.1148747 | 0.0000009 |
| K20533 trbI; type IV secretion system protein TrbI | -6.1158474 | 0.0000011 |
| K03074 secF; preprotein translocase subunit SecF | -6.1206397 | 0.0000000 |
| K03290 nanT; MFS transporter, SHS family, sialic acid transporter | -6.1237791 | 0.0005453 |
| K00368 nirK; nitrite reductase (NO-forming) | -6.1604933 | 0.0000245 |
| K07220 K07220; uncharacterized protein | -6.2003992 | 0.0003796 |
| K22319 oleC; olefin beta-lactone synthetase | -6.3621904 | 0.0000700 |
| K19180 tll; dTDP-6-deoxy-L-talose 4-dehydrogenase (NAD+) | -6.3671514 | 0.0098966 |
| K03179 ubiA; 4-hydroxybenzoate polyprenyltransferase | -6.4147945 | 0.0000048 |
| K02770 PTS-Fru-EIIC, fruA; PTS system, fructose-specific IIC component | -6.4527764 | 0.0000008 |
| K02769 PTS-Fru-EIIB, fruA; PTS system, fructose-specific IIB component | -6.4527764 | 0.0000008 |
| K04083 hslO; molecular chaperone Hsp33 | -6.4745551 | 0.0000024 |
| K00609 pyrB, PYR2; aspartate carbamoyltransferase catalytic subunit | -6.4853801 | 0.0033392 |
| K03747 smg; Smg protein | -6.6444347 | 0.0000040 |
| K01673 cynT, can; carbonic anhydrase | -6.6632026 | 0.0000000 |
| K04754 mlaA, vacJ; phospholipid-binding lipoprotein MlaA | -6.7234948 | 0.0000001 |
| K07670 mtrA; two-component system, OmpR family, response regulator MtrA | -6.7331937 | 0.0000000 |
| K11189 PTS-HPR; phosphocarrier protein | -6.8845382 | 0.0045279 |
| K01129 dgt; dGTPase | -6.9440136 | 0.0000490 |
| K02440 GLPF; glycerol uptake facilitator protein | -6.9930044 | 0.0000030 |
| K03072 secD; preprotein translocase subunit SecD | -7.1830725 | 0.0000054 |
| K02033 ABC.PE.P; peptide/nickel transport system permease protein | -7.2629446 | 0.0095901 |
| K05522 nei; endonuclease VIII | -7.2726538 | 0.0000010 |
| K02111 ATPF1A, atpA; F-type H+/Na+-transporting ATPase subunit alpha | -7.4788154 | 0.0091373 |
| K11906 vasD, lip; type VI secretion system protein VasD | -7.5285192 | 0.0000000 |
| K06320 cgeB; spore maturation protein CgeB | -7.7614668 | 0.0000014 |
| K21237 gp2; Phi29virus DNA polymerase | -7.8498310 | 0.0000867 |
| K04488 iscU, nifU; nitrogen fixation protein NifU and related proteins | -7.9371426 | 0.0000000 |
| K21449 ata, sadA, emaA; trimeric autotransporter adhesin | -8.0468978 | 0.0000042 |
| K07461 K07461; putative endonuclease | -8.5351052 | 0.0000009 |
Negative fold changes are higher in buccal samples while positive are higher in buccal_euk samples.
bbe3 = kos.loc[:, ['Buccal_Buccal_euk_FC', 'Buccal_Buccal_euk_p']]
bbe3 = bbe3[bbe3['Buccal_Buccal_euk_p'] < 0.01]
bbe3 = bbe3.sort_values(by=['Buccal_Buccal_euk_FC'], ascending=False).rename(columns={'Buccal_Buccal_euk_FC':'Log2 fold change', 'Buccal_Buccal_euk_p':'p'})
bbe3 = pd.concat([bbe3.head(n=50), bbe3.tail(n=50)])
bbe3 = bbe3.groupby(by=bbe3.index, axis=0).mean()
bbe3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bbe3 %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| K02256 COX1; cytochrome c oxidase subunit 1 | -1.679128 | 0.0029860 |
| K02262 COX3; cytochrome c oxidase subunit 3 | -1.713256 | 0.0027708 |
| K03878 ND1; NADH-ubiquinone oxidoreductase chain 1 | -1.775227 | 0.0030581 |
| K02261 COX2; cytochrome c oxidase subunit 2 | -2.557425 | 0.0005212 |
| K03884 ND6; NADH-ubiquinone oxidoreductase chain 6 | -2.604483 | 0.0067667 |
| K03880 ND3; NADH-ubiquinone oxidoreductase chain 3 | -2.679944 | 0.0039535 |
| K03881 ND4; NADH-ubiquinone oxidoreductase chain 4 | -2.720035 | 0.0002712 |
| K03883 ND5; NADH-ubiquinone oxidoreductase chain 5 | -3.128334 | 0.0002943 |
| K03882 ND4L; NADH-ubiquinone oxidoreductase chain 4L | -3.710827 | 0.0004718 |
| K01197 hya; hyaluronoglucosaminidase | -5.554774 | 0.0000297 |
| K08051 PLEKHA8, FAPP2; pleckstrin homology domain containing family A member 8 | -5.756705 | 0.0000020 |
| K00716 FUT3; galactoside 3(4)-fucosyltransferase 3 | -6.041298 | 0.0000008 |
| K04181 CCR6, CD196; C-C chemokine receptor type 6 | -6.569081 | 0.0000001 |
| K00161 PDHA, pdhA; pyruvate dehydrogenase E1 component alpha subunit | -6.596322 | 0.0000039 |
| K00974 cca; tRNA nucleotidyltransferase (CCA-adding enzyme) | -6.613553 | 0.0000080 |
| K03402 argR, ahrC; transcriptional regulator of arginine metabolism | -6.728270 | 0.0000033 |
| K05787 hupA; DNA-binding protein HU-alpha | -6.800914 | 0.0000039 |
| K16362 FLRT; leucine-rich repeat transmembrane protein FLRT | -6.987625 | 0.0000000 |
| K03531 ftsZ; cell division protein FtsZ | -7.047213 | 0.0000029 |
| K00549 metE; 5-methyltetrahydropteroyltriglutamate–homocysteine methyltransferase | -7.057557 | 0.0001285 |
| K02012 afuA, fbpA; iron(III) transport system substrate-binding protein | -7.066580 | 0.0000956 |
| K04047 dps; starvation-inducible DNA-binding protein | -7.099427 | 0.0000053 |
| K15051 endA; DNA-entry nuclease | -7.139538 | 0.0000011 |
| K09501 SERPINH1, HSP47; serpin peptidase inhibitor, clade H, member 1 | -7.254149 | 0.0000014 |
| K01870 IARS, ileS; isoleucyl-tRNA synthetase | -7.323407 | 0.0000020 |
| K04763 xerD; integrase/recombinase XerD | -7.415967 | 0.0000014 |
| K02892 RP-L23, MRPL23, rplW; large subunit ribosomal protein L23 | -7.435998 | 0.0000041 |
| K01817 trpF; phosphoribosylanthranilate isomerase | -7.491139 | 0.0000306 |
| K01520 dut, DUT; dUTP pyrophosphatase | -7.544095 | 0.0000759 |
| K03327 TC.MATE, SLC47A, norM, mdtK, dinF; multidrug resistance protein, MATE family | -7.566111 | 0.0000001 |
| K09861 K09861; uncharacterized protein | -7.580484 | 0.0000002 |
| K00981 E2.7.7.41, CDS1, CDS2, cdsA; phosphatidate cytidylyltransferase | -7.645951 | 0.0002095 |
| K03271 gmhA, lpcA; D-sedoheptulose 7-phosphate isomerase | -7.657154 | 0.0057884 |
| K02238 comEC; competence protein ComEC | -7.729226 | 0.0000000 |
| K13571 pafA; proteasome accessory factor A | -7.746068 | 0.0000001 |
| K01821 praC, xylH; 4-oxalocrotonate tautomerase | -7.799769 | 0.0000008 |
| K00886 ppgK; polyphosphate glucokinase | -7.804302 | 0.0000006 |
| K01243 mtnN, mtn, pfs; adenosylhomocysteine nucleosidase | -7.825905 | 0.0000006 |
| K02835 prfA, MTRF1, MRF1; peptide chain release factor 1 | -7.846768 | 0.0000000 |
| K07461 K07461; putative endonuclease | -7.915114 | 0.0000077 |
| K00241 sdhC, frdC; succinate dehydrogenase / fumarate reductase, cytochrome b subunit | -7.920181 | 0.0000048 |
| K07768 senX3; two-component system, OmpR family, sensor histidine kinase SenX3 | -7.930145 | 0.0000068 |
| K00656 E2.3.1.54, pflD; formate C-acetyltransferase | -7.940533 | 0.0000001 |
| K00616 E2.2.1.2, talA, talB; transaldolase | -7.954862 | 0.0000002 |
| K02033 ABC.PE.P; peptide/nickel transport system permease protein | -8.022345 | 0.0000002 |
| K02884 RP-L19, MRPL19, rplS; large subunit ribosomal protein L19 | -8.025169 | 0.0000121 |
| K22463 afsA; 2-oxo-3-(phosphooxy)propyl 3-oxoalkanoate synthase | -8.045847 | 0.0000022 |
| K00872 thrB1; homoserine kinase | -8.098105 | 0.0000000 |
| K03595 era, ERAL1; GTPase | -8.142956 | 0.0000002 |
| K06855 MAPK4_6; mitogen-activated protein kinase 43986 | -8.149037 | 0.0000042 |
| K02887 RP-L20, MRPL20, rplT; large subunit ribosomal protein L20 | -8.154249 | 0.0000000 |
| K03565 recX; regulatory protein | -8.197517 | 0.0000001 |
| K01939 purA, ADSS; adenylosuccinate synthase | -8.204780 | 0.0000023 |
| K02342 dnaQ; DNA polymerase III subunit epsilon | -8.212993 | 0.0000005 |
| K02770 PTS-Fru-EIIC, fruA; PTS system, fructose-specific IIC component | -8.224142 | 0.0000003 |
| K02769 PTS-Fru-EIIB, fruA; PTS system, fructose-specific IIB component | -8.224142 | 0.0000003 |
| K00936 pdtaS; two-component system, sensor histidine kinase PdtaS | -8.330524 | 0.0000005 |
| K11754 folC; dihydrofolate synthase / folylpolyglutamate synthase | -8.341740 | 0.0000015 |
| K03551 ruvB; holliday junction DNA helicase RuvB | -8.360546 | 0.0000008 |
| K01129 dgt; dGTPase | -8.375383 | 0.0000074 |
| K05364 | -8.388278 | 0.0000025 |
| K11733 lysP; lysine-specific permease | -8.446310 | 0.0000009 |
| K00766 trpD; anthranilate phosphoribosyltransferase | -8.488092 | 0.0000000 |
| K02768 PTS-Fru-EIIA, fruB; PTS system, fructose-specific IIA component | -8.537095 | 0.0000001 |
| K00567 ogt, MGMT; methylated-DNA- | -8.583419 | 0.0026064 |
| K00728 POMT, pmt; dolichyl-phosphate-mannose-protein mannosyltransferase | -8.634135 | 0.0000113 |
| K12019 TRIM43S; tripartite motif-containing protein 43/48/49/64/77 | -8.640636 | 0.0000002 |
| K01673 cynT, can; carbonic anhydrase | -8.648944 | 0.0000010 |
| K01256 pepN; aminopeptidase N | -8.653422 | 0.0000075 |
| K02358 tuf, TUFM; elongation factor Tu | -8.791193 | 0.0058415 |
| K03308 TC.NSS; neurotransmitter:Na+ symporter, NSS family | -8.836290 | 0.0044429 |
| K07442 TRM61, GCD14; tRNA (adenine57-N1/adenine58-N1)-methyltransferase catalytic subunit | -8.916610 | 0.0053757 |
| K04096 smf; DNA processing protein | -9.132566 | 0.0000008 |
| K01923 purC; phosphoribosylaminoimidazole-succinocarboxamide synthase | -9.188886 | 0.0011003 |
| K03536 rnpA; ribonuclease P protein component | -9.297534 | 0.0000043 |
| K03559 exbD; biopolymer transport protein ExbD | -9.402267 | 0.0000000 |
| K00059 fabG, OAR1; 3-oxoacyl- | -9.475675 | 0.0000004 |
| K01754 E4.3.1.19, ilvA, tdcB; threonine dehydratase | -9.534632 | 0.0051595 |
| K00088 IMPDH, guaB; IMP dehydrogenase | -9.600835 | 0.0000000 |
| K03657 uvrD, pcrA; DNA helicase II / ATP-dependent DNA helicase PcrA | -9.684566 | 0.0000000 |
| UNGROUPED | -337.103078 | 0.0070408 |
Negative fold changes are higher in blood samples while positive are higher in saliva_euk samples.
blbe3 = kos.loc[:, ['Blood_Saliva_euk_FC', 'Blood_Saliva_euk_p']]
blbe3 = blbe3[blbe3['Blood_Saliva_euk_p'] < 0.01]
blbe3 = blbe3.sort_values(by=['Blood_Saliva_euk_FC'], ascending=False).rename(columns={'Blood_Saliva_euk_FC':'Log2 fold change', 'Blood_Saliva_euk_p':'p'})
blbe3 = pd.concat([blbe3.head(n=50), blbe3.tail(n=50)])
blbe3 = blbe3.groupby(by=blbe3.index, axis=0).mean()
blbe3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$blbe3 %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| K02963 RP-S18, MRPS18, rpsR; small subunit ribosomal protein S18 | 11.341959 | 0.0000001 |
| K02948 RP-S11, MRPS11, rpsK; small subunit ribosomal protein S11 | 10.950425 | 0.0000000 |
| K02518 infA; translation initiation factor IF-1 | 10.909749 | 0.0000005 |
| K01992 ABC-2.P; ABC-2 type transport system permease protein | 10.780916 | 0.0000174 |
| K02913 RP-L33, MRPL33, rpmG; large subunit ribosomal protein L33 | 10.638615 | 0.0000040 |
| K02902 RP-L28, MRPL28, rpmB; large subunit ribosomal protein L28 | 10.367308 | 0.0000000 |
| K02428 rdgB; XTP/dITP diphosphohydrolase | 10.320932 | 0.0000056 |
| K01662 dxs; 1-deoxy-D-xylulose-5-phosphate synthase | 10.115384 | 0.0000006 |
| K01928 murE; UDP-N-acetylmuramoyl-L-alanyl-D-glutamate–2,6-diaminopimelate ligase | 10.092058 | 0.0000040 |
| K02916 RP-L35, MRPL35, rpmI; large subunit ribosomal protein L35 | 10.055988 | 0.0000001 |
| K02909 RP-L31, rpmE; large subunit ribosomal protein L31 | 10.001858 | 0.0000000 |
| K03551 ruvB; holliday junction DNA helicase RuvB | 9.962025 | 0.0000002 |
| K02988 RP-S5, MRPS5, rpsE; small subunit ribosomal protein S5 | 9.939578 | 0.0000007 |
| K01358 clpP, CLPP; ATP-dependent Clp protease, protease subunit | 9.901599 | 0.0000043 |
| K03977 engA, der; GTPase | 9.881898 | 0.0000000 |
| K03629 recF; DNA replication and repair protein RecF | 9.843115 | 0.0000002 |
| K06891 clpS; ATP-dependent Clp protease adaptor protein ClpS | 9.816164 | 0.0000343 |
| K02358 tuf, TUFM; elongation factor Tu | 9.807406 | 0.0000016 |
| K02835 prfA, MTRF1, MRF1; peptide chain release factor 1 | 9.766626 | 0.0000000 |
| K01885 EARS, gltX; glutamyl-tRNA synthetase | 9.722861 | 0.0000611 |
| K02899 RP-L27, MRPL27, rpmA; large subunit ribosomal protein L27 | 9.716704 | 0.0000002 |
| K01579 panD; aspartate 1-decarboxylase | 9.675909 | 0.0000000 |
| K02967 RP-S2, MRPS2, rpsB; small subunit ribosomal protein S2 | 9.670229 | 0.0000001 |
| K00942 E2.7.4.8, gmk; guanylate kinase | 9.665516 | 0.0000101 |
| K02337 dnaE; DNA polymerase III subunit alpha | 9.637137 | 0.0000000 |
| K02108 ATPF0A, atpB; F-type H+-transporting ATPase subunit a | 9.604602 | 0.0000009 |
| K03073 secE; preprotein translocase subunit SecE | 9.598304 | 0.0000013 |
| K03470 rnhB; ribonuclease HII | 9.587183 | 0.0000142 |
| K03711 fur, zur, furB; Fur family transcriptional regulator, ferric uptake regulator | 9.573168 | 0.0000004 |
| K01495 GCH1, folE; GTP cyclohydrolase IA | 9.543082 | 0.0000413 |
| K03070 secA; preprotein translocase subunit SecA | 9.535647 | 0.0000022 |
| K02867 RP-L11, MRPL11, rplK; large subunit ribosomal protein L11 | 9.450057 | 0.0000021 |
| K03811 pnuC; nicotinamide mononucleotide transporter | 9.449862 | 0.0000002 |
| K03664 smpB; SsrA-binding protein | 9.441698 | 0.0000000 |
| K02563 murG; UDP-N-acetylglucosamine–N-acetylmuramyl-(pentapeptide) pyrophosphoryl-undecaprenol N-acetylglucosamine transferase | 9.308290 | 0.0000006 |
| K00773 tgt, QTRT1; queuine tRNA-ribosyltransferase | 9.279428 | 0.0000012 |
| K02906 RP-L3, MRPL3, rplC; large subunit ribosomal protein L3 | 9.243950 | 0.0000001 |
| K02914 RP-L34, MRPL34, rpmH; large subunit ribosomal protein L34 | 9.232331 | 0.0000029 |
| K12952 ctpE; cation-transporting P-type ATPase E | 9.229432 | 0.0000010 |
| K00075 murB; UDP-N-acetylmuramate dehydrogenase | 9.228715 | 0.0000005 |
| K02338 dnaN; DNA polymerase III subunit beta | 9.198856 | 0.0000054 |
| K02874 RP-L14, MRPL14, rplN; large subunit ribosomal protein L14 | 9.184765 | 0.0000000 |
| K00963 UGP2, galU, galF; UTP–glucose-1-phosphate uridylyltransferase | 9.177459 | 0.0000001 |
| K01091 gph; phosphoglycolate phosphatase | 9.109656 | 0.0000004 |
| K03624 greA; transcription elongation factor GreA | 9.104214 | 0.0000001 |
| K02601 nusG; transcription termination/antitermination protein NusG | 9.102247 | 0.0000004 |
| K19710 E2.7.7.53; ATP adenylyltransferase | 9.095833 | 0.0000027 |
| K00765 hisG; ATP phosphoribosyltransferase | 9.092277 | 0.0000000 |
| K02115 ATPF1G, atpG; F-type H+-transporting ATPase subunit gamma | 9.081642 | 0.0000002 |
| K03116 tatA; sec-independent protein translocase protein TatA | 9.072573 | 0.0000218 |
| K01704 leuD, IPMI-S; 3-isopropylmalate/(R)-2-methylmalate dehydratase small subunit | 8.410856 | 0.0001828 |
| K00943 tmk, DTYMK; dTMP kinase | 8.381361 | 0.0000023 |
| K18893 vcaM; ATP-binding cassette, subfamily B, multidrug efflux pump | 8.358762 | 0.0000011 |
| K08396 MRGPRX; Mas-related G protein-coupled receptor member X | 8.357014 | 0.0000000 |
| K13643 iscR; Rrf2 family transcriptional regulator, iron-sulfur cluster assembly transcription factor | 8.341677 | 0.0000161 |
| K01883 CARS, cysS; cysteinyl-tRNA synthetase | 8.314948 | 0.0000003 |
| K02535 lpxC; UDP-3-O- | 8.314917 | 0.0000095 |
| K01255 CARP, pepA; leucyl aminopeptidase | 8.305561 | 0.0000000 |
| K09903 pyrH; uridylate kinase | 8.299529 | 0.0000363 |
| K00610 pyrI; aspartate carbamoyltransferase regulatory subunit | 8.284502 | 0.0000060 |
| K01687 ilvD; dihydroxy-acid dehydratase | 8.248209 | 0.0000257 |
| K13678 cpoA; 1,2-diacylglycerol-3-alpha-glucose alpha-1,2-galactosyltransferase | 8.207438 | 0.0000012 |
| K01657 trpE; anthranilate synthase component I | 8.184915 | 0.0000137 |
| K05863 SLC25A4S, ANT; solute carrier family 25 (mitochondrial adenine nucleotide translocator), member 4/5/6/31 | 8.184273 | 0.0000001 |
| K02956 RP-S15, MRPS15, rpsO; small subunit ribosomal protein S15 | 8.168320 | 0.0000108 |
| K01915 glnA, GLUL; glutamine synthetase | 8.163729 | 0.0000003 |
| K07224 efeO; iron uptake system component EfeO | 8.116363 | 0.0000387 |
| K03530 hupB; DNA-binding protein HU-beta | 8.060273 | 0.0000157 |
| K02823 pyrDII; dihydroorotate dehydrogenase electron transfer subunit | 8.023014 | 0.0000302 |
| K03594 bfr; bacterioferritin | 8.011747 | 0.0002121 |
| K01924 murC; UDP-N-acetylmuramate–alanine ligase | 8.010583 | 0.0000004 |
| K01874 MARS, metG; methionyl-tRNA synthetase | 8.004532 | 0.0000001 |
| K00945 cmk; CMP/dCMP kinase | 7.994847 | 0.0000011 |
| K00341 nuoL; NADH-quinone oxidoreductase subunit L | 7.974158 | 0.0000295 |
| K00948 PRPS, prsA; ribose-phosphate pyrophosphokinase | 7.958184 | 0.0000053 |
| K09529 DNAJC9; DnaJ homolog subfamily C member 9 | 7.946732 | 0.0000002 |
| K03484 scrR; LacI family transcriptional regulator, sucrose operon repressor | 7.921266 | 0.0000126 |
| K00260 gudB, rocG; glutamate dehydrogenase | 7.905971 | 0.0000493 |
| K01595 ppc; phosphoenolpyruvate carboxylase | 7.900376 | 0.0000003 |
| K02662 pilM; type IV pilus assembly protein PilM | 7.851796 | 0.0000046 |
| K00760 hprT, hpt, HPRT1; hypoxanthine phosphoribosyltransferase | 7.827778 | 0.0000024 |
| K09560 ST13; suppressor of tumorigenicity protein 13 | 7.700560 | 0.0000007 |
| K00376 nosZ; nitrous-oxide reductase | 7.670794 | 0.0000014 |
| K02357 tsf, TSFM; elongation factor Ts | 7.634399 | 0.0000053 |
| K19157 yafQ; mRNA interferase YafQ | 7.622868 | 0.0000014 |
| K03602 xseB; exodeoxyribonuclease VII small subunit | 7.591479 | 0.0000044 |
| K16197 YWHAB_Q_Z; 37694 protein beta/theta/zeta | 7.279191 | 0.0000000 |
| K10580 UBE2N, BLU, UBC13; ubiquitin-conjugating enzyme E2 N | 7.204063 | 0.0000023 |
| K20651 ARHGAP42, GRAF3; Rho GTPase-activating protein 42 | 7.204023 | 0.0000153 |
| K03257 EIF4A; translation initiation factor 4A | 7.166795 | 0.0000018 |
| K08363 merT; mercuric ion transport protein | 6.963124 | 0.0002421 |
| K19879 TCAP; telethonin | 6.621062 | 0.0000314 |
| K00236 SDHC, SDH3; succinate dehydrogenase (ubiquinone) cytochrome b560 subunit | 6.441310 | 0.0000309 |
| K11097 SNRPE, SME; small nuclear ribonucleoprotein E | 6.299610 | 0.0000624 |
| K02975 RP-S25e, RPS25; small subunit ribosomal protein S25e | 5.876647 | 0.0000039 |
| K04393 CDC42; cell division control protein 42 | 5.856245 | 0.0000933 |
| K04552 UBE2L3, UBCH7; ubiquitin-conjugating enzyme E2 L3 | 5.207358 | 0.0002249 |
| K02256 COX1; cytochrome c oxidase subunit 1 | -1.171254 | 0.0058121 |
| K03884 ND6; NADH-ubiquinone oxidoreductase chain 6 | -1.745828 | 0.0017884 |
| K03881 ND4; NADH-ubiquinone oxidoreductase chain 4 | -1.892856 | 0.0068414 |
Negative fold changes are higher in blood samples while positive are higher in buccal_euk samples.
blse3 = kos.loc[:, ['Blood_Buccal_euk_FC', 'Blood_Buccal_euk_p']]
blse3 = blse3[blse3['Blood_Buccal_euk_p'] < 0.01]
blse3 = blse3.sort_values(by=['Blood_Buccal_euk_FC'], ascending=False).rename(columns={'Blood_Buccal_euk_FC':'Log2 fold change', 'Blood_Buccal_euk_p':'p'})
blse3 = pd.concat([blse3.head(n=50), blse.tail(n=50)])
blse3 = blse3.groupby(by=blse3.index, axis=0).mean()
blse3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$blse3 %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "600px", height = "400px")
| Log2 fold change | p | |
|---|---|---|
| K05863 SLC25A4S, ANT; solute carrier family 25 (mitochondrial adenine nucleotide translocator), member 4/5/6/31 | 9.3155876 | 0.0000046 |
| K02934 RP-L6e, RPL6; large subunit ribosomal protein L6e | 9.1316569 | 0.0000003 |
| K19468 KCNJ18; potassium inwardly-rectifying channel subfamily J member 18 | 8.1018621 | 0.0000096 |
| K03257 EIF4A; translation initiation factor 4A | 7.9018195 | 0.0000035 |
| K03062 PSMC1, RPT2; 26S proteasome regulatory subunit T2 | 7.8222486 | 0.0000030 |
| K11586 CBX3, HP1G; chromobox protein 3 | 7.5044275 | 0.0000008 |
| K03284 corA; magnesium transporter | 7.1522003 | 0.0001015 |
| K01661 menB; naphthoate synthase | 7.1493059 | 0.0000647 |
| K01647 CS, gltA; citrate synthase | 7.1195886 | 0.0000067 |
| K18928 lldE; L-lactate dehydrogenase complex protein LldE | 7.0983073 | 0.0000061 |
| K12757 MYL12; myosin regulatory light chain 12 | 7.0955579 | 0.0000007 |
| K05840 DRD5; dopamine receptor D5 | 7.0636557 | 0.0000000 |
| K08857 NEK1_4_5; NIMA (never in mitosis gene a)-related kinase 38443 | 7.0589315 | 0.0000035 |
| K03466 ftsK, spoIIIE; DNA segregation ATPase FtsK/SpoIIIE, S-DNA-T family | 7.0565895 | 0.0008980 |
| K03960 NDUFB4; NADH dehydrogenase (ubiquinone) 1 beta subcomplex subunit 4 | 7.0253055 | 0.0000000 |
| K03639 moaA, CNX2; GTP 3’,8-cyclase | 7.0054202 | 0.0000799 |
| K00782 lldG; L-lactate dehydrogenase complex protein LldG | 7.0019641 | 0.0000004 |
| K14455 GOT2; aspartate aminotransferase, mitochondrial | 6.9945243 | 0.0000001 |
| K03629 recF; DNA replication and repair protein RecF | 6.9883502 | 0.0000612 |
| K00653 CDY; chromodomain protein Y | 6.9102327 | 0.0000007 |
| K05786 rarD; chloramphenicol-sensitive protein RarD | 6.9046115 | 0.0000255 |
| K00371 narH, narY, nxrB; nitrate reductase / nitrite oxidoreductase, beta subunit | 6.8665611 | 0.0001685 |
| K03547 sbcD, mre11; DNA repair protein SbcD/Mre11 | 6.8586688 | 0.0000767 |
| K02342 dnaQ; DNA polymerase III subunit epsilon | 6.7129347 | 0.0000956 |
| K00525 E1.17.4.1A, nrdA, nrdE; ribonucleoside-diphosphate reductase alpha chain | 6.7013656 | 0.0000661 |
| K17455 FSCN2; fascin2 | 6.6408974 | 0.0000088 |
| K00240 sdhB, frdB; succinate dehydrogenase / fumarate reductase, iron-sulfur subunit | 6.6270061 | 0.0013377 |
| K10343 SPSB1_4, SSB1, SSB4; SPRY domain-containing SOCS box protein 43922 | 6.5750024 | 0.0000001 |
| K03723 mfd; transcription-repair coupling factor (superfamily II helicase) | 6.5721780 | 0.0000566 |
| K01778 dapF; diaminopimelate epimerase | 6.5471812 | 0.0000360 |
| K01207 nagZ; beta-N-acetylhexosaminidase | 6.4067020 | 0.0000378 |
| K01652 E2.2.1.6L, ilvB, ilvG, ilvI; acetolactate synthase I/II/III large subunit | 6.3920399 | 0.0007322 |
| K06911 K06911; uncharacterized protein | 6.3133922 | 0.0000030 |
| K02858 ribB, RIB3; 3,4-dihydroxy 2-butanone 4-phosphate synthase | 6.3106401 | 0.0003793 |
| K03527 ispH, lytB; 4-hydroxy-3-methylbut-2-en-1-yl diphosphate reductase | 6.1477167 | 0.0002274 |
| K01911 menE; O-succinylbenzoic acid—CoA ligase | 6.1373608 | 0.0000285 |
| K01744 aspA; aspartate ammonia-lyase | 6.0818258 | 0.0002584 |
| K00099 dxr; 1-deoxy-D-xylulose-5-phosphate reductoisomerase | 5.9628220 | 0.0004205 |
| K04875 KCNA2, KV1.2; potassium voltage-gated channel Shaker-related subfamily A member 2 | 5.9414027 | 0.0000049 |
| K01610 E4.1.1.49, pckA; phosphoenolpyruvate carboxykinase (ATP) | 5.9186660 | 0.0000373 |
| K12755 MYL9; myosin regulatory light chain 9 | 5.8986788 | 0.0000010 |
| K03319 TC.DASS; divalent anion:Na+ symporter, DASS family | 5.8744409 | 0.0000259 |
| K10752 RBBP4, HAT2, CAF1, MIS16; histone-binding protein RBBP4 | 5.7928033 | 0.0000036 |
| K02517 lpxL, htrB; Kdo2-lipid IVA lauroyltransferase/acyltransferase | 5.7843811 | 0.0001123 |
| K17392 IGF2BP2; insulin-like growth factor 2 mRNA-binding protein 2 | 5.7651144 | 0.0000040 |
| K00831 serC, PSAT1; phosphoserine aminotransferase | 5.7394312 | 0.0000028 |
| K02930 RP-L4e, RPL4; large subunit ribosomal protein L4e | 5.6662498 | 0.0000003 |
| K08301 rng, cafA; ribonuclease G | 5.6512400 | 0.0000924 |
| K06281 hyaB, hybC; hydrogenase large subunit | 5.4295802 | 0.0001948 |
| K04874 KCNA1, KV1.1; potassium voltage-gated channel Shaker-related subfamily A member 1 | 5.4132320 | 0.0000080 |
| K03546 sbcC, rad50; DNA repair protein SbcC/Rad50 | 5.2338079 | 0.0017231 |
| K16092 btuB; vitamin B12 transporter | 4.7102953 | 0.0017984 |
| K16495 PCDHGA; protocadherin gamma subfamily A | 4.5153796 | 0.0000049 |
| K00134 GAPDH, gapA; glyceraldehyde 3-phosphate dehydrogenase | 1.7257634 | 0.0057157 |
| K06478 PTPRC, CD45; receptor-type tyrosine-protein phosphatase C | -0.3261990 | 0.0020246 |
| K00522 FTH1; ferritin heavy chain | -0.9651333 | 0.0070830 |
| K03881 ND4; NADH-ubiquinone oxidoreductase chain 4 | -1.3794228 | 0.0062609 |